summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap6
-rw-r--r--CREDITS15
-rw-r--r--Documentation/ABI/testing/debugfs-pktcdvd18
-rw-r--r--Documentation/ABI/testing/sysfs-class-pktcdvd97
-rw-r--r--Documentation/ABI/testing/sysfs-fs-erofs3
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst15
-rw-r--r--Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst91
-rw-r--r--Documentation/admin-guide/hw-vuln/index.rst1
-rw-r--r--Documentation/admin-guide/mm/zswap.rst4
-rw-r--r--Documentation/arm64/silicon-errata.rst2
-rw-r--r--Documentation/conf.py12
-rw-r--r--Documentation/devicetree/bindings/.gitignore5
-rw-r--r--Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml11
-rw-r--r--Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml2
-rw-r--r--Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml2
-rw-r--r--Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml2
-rw-r--r--Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml1
-rw-r--r--Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml1
-rw-r--r--Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml2
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml2
-rw-r--r--Documentation/devicetree/bindings/i2c/renesas,rzv2m.yaml4
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml24
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml3
-rw-r--r--Documentation/devicetree/bindings/net/marvell,orion-mdio.yaml30
-rw-r--r--Documentation/devicetree/bindings/phy/amlogic,g12a-usb2-phy.yaml (renamed from Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml)8
-rw-r--r--Documentation/devicetree/bindings/phy/amlogic,g12a-usb3-pcie-phy.yaml (renamed from Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb3-pcie-phy.yaml)6
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,usb-hs-28nm.yaml1
-rw-r--r--Documentation/devicetree/bindings/regulator/samsung,s2mps14.yaml21
-rw-r--r--Documentation/devicetree/bindings/riscv/cpus.yaml2
-rw-r--r--Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml2
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,apr-services.yaml5
-rw-r--r--[-rwxr-xr-x]Documentation/devicetree/bindings/sound/everest,es8326.yaml0
-rw-r--r--Documentation/devicetree/bindings/sound/mt8186-mt6366-rt1019-rt5682s.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml4
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,lpass-wsa-macro.yaml58
-rw-r--r--Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml10
-rw-r--r--Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml2
-rw-r--r--Documentation/devicetree/bindings/spi/atmel,quadspi.yaml2
-rw-r--r--Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml4
-rw-r--r--Documentation/filesystems/erofs.rst2
-rw-r--r--Documentation/filesystems/fscrypt.rst4
-rw-r--r--Documentation/filesystems/fsverity.rst96
-rw-r--r--Documentation/filesystems/locking.rst24
-rw-r--r--Documentation/filesystems/vfs.rst24
-rw-r--r--Documentation/kbuild/makefiles.rst2
-rw-r--r--Documentation/networking/bridge.rst2
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/ice.rst2
-rw-r--r--Documentation/networking/device_drivers/ethernet/wangxun/txgbe.rst2
-rw-r--r--Documentation/networking/nf_conntrack-sysctl.rst10
-rw-r--r--Documentation/networking/rxrpc.rst4
-rw-r--r--Documentation/process/maintainer-netdev.rst369
-rw-r--r--Documentation/sphinx/load_config.py6
-rw-r--r--Documentation/virt/kvm/api.rst32
-rw-r--r--Documentation/virt/kvm/locking.rst25
-rw-r--r--Documentation/x86/amd-memory-encryption.rst36
-rw-r--r--MAINTAINERS119
-rw-r--r--Makefile21
-rw-r--r--arch/arm/Makefile2
-rw-r--r--arch/arm/boot/dts/armada-38x.dtsi4
-rw-r--r--arch/arm/boot/dts/armada-39x.dtsi4
-rw-r--r--arch/arm/boot/dts/aspeed-bmc-ibm-bonnell.dts2
-rw-r--r--arch/arm/boot/dts/imx53-ppd.dts2
-rw-r--r--arch/arm/boot/dts/imx6qdl-gw560x.dtsi1
-rw-r--r--arch/arm/boot/dts/imx6ul-pico-dwarf.dts2
-rw-r--r--arch/arm/boot/dts/imx7d-pico-dwarf.dts4
-rw-r--r--arch/arm/boot/dts/imx7d-pico-nymph.dts4
-rw-r--r--arch/arm/boot/dts/imx7d-smegw01.dts3
-rw-r--r--arch/arm/boot/dts/nuvoton-wpcm450.dtsi1
-rw-r--r--arch/arm/boot/dts/qcom-apq8084-ifc6540.dts20
-rw-r--r--arch/arm/boot/dts/qcom-apq8084.dtsi4
-rw-r--r--arch/arm/boot/dts/rk3288.dtsi1
-rw-r--r--arch/arm/boot/dts/sam9x60.dtsi2
-rw-r--r--arch/arm/boot/dts/stihxxx-b2120.dtsi2
-rw-r--r--arch/arm/boot/dts/stm32mp151a-prtt1l.dtsi8
-rw-r--r--arch/arm/boot/dts/stm32mp157c-emstamp-argon.dtsi8
-rw-r--r--arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi8
-rw-r--r--arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi8
-rw-r--r--arch/arm/boot/dts/vf610-zii-dev-rev-b.dts2
-rw-r--r--arch/arm/boot/dts/vf610-zii-dev-rev-c.dts2
-rw-r--r--arch/arm/crypto/Makefile7
-rw-r--r--arch/arm/include/asm/thread_info.h13
-rw-r--r--arch/arm/kernel/sys_oabi-compat.c1
-rw-r--r--arch/arm/mach-footbridge/isa-rtc.c1
-rw-r--r--arch/arm/mach-imx/cpu-imx25.c1
-rw-r--r--arch/arm/mach-imx/cpu-imx27.c1
-rw-r--r--arch/arm/mach-imx/cpu-imx31.c1
-rw-r--r--arch/arm/mach-imx/cpu-imx35.c1
-rw-r--r--arch/arm/mach-imx/cpu-imx5.c1
-rw-r--r--arch/arm/mach-omap1/Kconfig5
-rw-r--r--arch/arm/mach-omap1/Makefile4
-rw-r--r--arch/arm/mach-omap1/gpio15xx.c1
-rw-r--r--arch/arm/mach-omap1/io.c32
-rw-r--r--arch/arm/mach-omap1/mcbsp.c21
-rw-r--r--arch/arm/mach-omap1/pm.h7
-rw-r--r--arch/arm/mach-pxa/Kconfig2
-rw-r--r--arch/arm/mm/nommu.c2
-rw-r--r--arch/arm/mm/proc-macros.S1
-rw-r--r--arch/arm64/Kconfig18
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-axg.dtsi4
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi6
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gx.dtsi6
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-sm1-odroid-hc4.dts8
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1012a-qds.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1088a-qds.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls208xa-rdb.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-lx2160a-cex7.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8dxl.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi4
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-data-modul-edm-sbc.dts3
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-pinfunc.h2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx-0x-rs232-rts.dtso1
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx-0x-rs232-rts.dtso1
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi1
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts4
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-venice-gw7902.dts3
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts1
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-verdin-dahlia.dtsi1
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-verdin-dev.dtsi1
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi1
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mn-venice-gw7902.dts1
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-evk.dts4
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi10
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts1
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp.dtsi15
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts4
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mq-thor96.dts4
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qxp-mek.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts6
-rw-r--r--arch/arm64/boot/dts/marvell/ac5-98dx25xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8195.dtsi4
-rw-r--r--arch/arm64/boot/dts/qcom/msm8992-lg-bullhead.dtsi6
-rw-r--r--arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts77
-rw-r--r--arch/arm64/boot/dts/qcom/msm8992.dtsi4
-rw-r--r--arch/arm64/boot/dts/qcom/msm8994-huawei-angler-rev-101.dts19
-rw-r--r--arch/arm64/boot/dts/qcom/sc8280xp.dtsi79
-rw-r--r--arch/arm64/boot/dts/qcom/sm8250.dtsi1
-rw-r--r--arch/arm64/boot/dts/qcom/sm8350.dtsi4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-op1-opp.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts7
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399.dtsi6
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts11
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts5
-rw-r--r--arch/arm64/boot/dts/rockchip/rk356x.dtsi1
-rw-r--r--arch/arm64/crypto/sm4-ce-ccm-core.S5
-rw-r--r--arch/arm64/crypto/sm4-ce-gcm-core.S5
-rw-r--r--arch/arm64/include/asm/atomic_ll_sc.h2
-rw-r--r--arch/arm64/include/asm/atomic_lse.h2
-rw-r--r--arch/arm64/include/asm/cputype.h4
-rw-r--r--arch/arm64/include/asm/efi.h9
-rw-r--r--arch/arm64/include/asm/esr.h9
-rw-r--r--arch/arm64/include/asm/hugetlb.h9
-rw-r--r--arch/arm64/include/asm/kvm_arm.h15
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h42
-rw-r--r--arch/arm64/include/asm/pgtable.h16
-rw-r--r--arch/arm64/include/asm/stacktrace.h15
-rw-r--r--arch/arm64/include/asm/uprobes.h2
-rw-r--r--arch/arm64/kernel/cpu_errata.c7
-rw-r--r--arch/arm64/kernel/efi-rt-wrapper.S7
-rw-r--r--arch/arm64/kernel/efi.c3
-rw-r--r--arch/arm64/kernel/elfcore.c61
-rw-r--r--arch/arm64/kernel/fpsimd.c2
-rw-r--r--arch/arm64/kernel/perf_event.c15
-rw-r--r--arch/arm64/kernel/ptrace.c2
-rw-r--r--arch/arm64/kernel/signal.c9
-rw-r--r--arch/arm64/kernel/stacktrace.c12
-rw-r--r--arch/arm64/kvm/guest.c2
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/fault.h2
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h2
-rw-r--r--arch/arm64/kvm/mmu.c21
-rw-r--r--arch/arm64/kvm/sys_regs.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c13
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c31
-rw-r--r--arch/arm64/kvm/vgic/vgic-v4.c8
-rw-r--r--arch/arm64/kvm/vgic/vgic.h15
-rw-r--r--arch/arm64/mm/hugetlbpage.c21
-rw-r--r--arch/arm64/mm/mmu.c21
-rw-r--r--arch/arm64/tools/cpucaps1
-rw-r--r--arch/ia64/kernel/elfcore.c4
-rw-r--r--arch/ia64/kernel/sys_ia64.c7
-rw-r--r--arch/loongarch/include/asm/ftrace.h2
-rw-r--r--arch/loongarch/include/asm/inst.h9
-rw-r--r--arch/loongarch/include/asm/unwind.h41
-rw-r--r--arch/loongarch/kernel/Makefile2
-rw-r--r--arch/loongarch/kernel/alternative.c6
-rw-r--r--arch/loongarch/kernel/cpu-probe.c2
-rw-r--r--arch/loongarch/kernel/genex.S3
-rw-r--r--arch/loongarch/kernel/inst.c45
-rw-r--r--arch/loongarch/kernel/process.c12
-rw-r--r--arch/loongarch/kernel/traps.c3
-rw-r--r--arch/loongarch/kernel/unwind.c32
-rw-r--r--arch/loongarch/kernel/unwind_guess.c49
-rw-r--r--arch/loongarch/kernel/unwind_prologue.c252
-rw-r--r--arch/loongarch/mm/tlb.c2
-rw-r--r--arch/mips/ralink/of.c2
-rw-r--r--arch/parisc/kernel/firmware.c5
-rw-r--r--arch/parisc/kernel/ptrace.c21
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/boot/dts/fsl/t2081si-post.dtsi16
-rwxr-xr-xarch/powerpc/boot/wrapper4
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush.h2
-rw-r--r--arch/powerpc/include/asm/hw_irq.h43
-rw-r--r--arch/powerpc/include/asm/imc-pmu.h2
-rw-r--r--arch/powerpc/kernel/dbell.c2
-rw-r--r--arch/powerpc/kernel/head_85xx.S3
-rw-r--r--arch/powerpc/kernel/interrupt.c6
-rw-r--r--arch/powerpc/kernel/irq.c2
-rw-r--r--arch/powerpc/kernel/time.c2
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S6
-rw-r--r--arch/powerpc/kexec/file_load_64.c12
-rw-r--r--arch/powerpc/kvm/booke.c5
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c2
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c24
-rw-r--r--arch/powerpc/perf/imc-pmu.c122
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c8
-rw-r--r--arch/riscv/Makefile3
-rw-r--r--arch/riscv/boot/dts/sifive/fu740-c000.dtsi2
-rw-r--r--arch/riscv/include/asm/alternative-macros.h2
-rw-r--r--arch/riscv/include/asm/hwcap.h3
-rw-r--r--arch/riscv/include/asm/pgtable.h4
-rw-r--r--arch/riscv/include/asm/uaccess.h2
-rw-r--r--arch/riscv/include/asm/vdso/processor.h28
-rw-r--r--arch/riscv/kernel/head.S2
-rw-r--r--arch/riscv/kernel/probes/kprobes.c26
-rw-r--r--arch/riscv/kernel/probes/simulate-insn.c4
-rw-r--r--arch/riscv/kernel/probes/simulate-insn.h4
-rw-r--r--arch/riscv/kernel/smpboot.c3
-rw-r--r--arch/riscv/kernel/stacktrace.c3
-rw-r--r--arch/riscv/mm/cacheflush.c4
-rw-r--r--arch/riscv/mm/pgtable.c20
-rw-r--r--arch/s390/boot/decompressor.c6
-rw-r--r--arch/s390/configs/debug_defconfig7
-rw-r--r--arch/s390/configs/defconfig6
-rw-r--r--arch/s390/configs/zfcpdump_defconfig2
-rw-r--r--arch/s390/include/asm/cpu_mf.h31
-rw-r--r--arch/s390/include/asm/debug.h6
-rw-r--r--arch/s390/include/asm/percpu.h2
-rw-r--r--arch/s390/kernel/machine_kexec_file.c5
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c101
-rw-r--r--arch/s390/kernel/setup.c5
-rw-r--r--arch/s390/kernel/vmlinux.lds.S4
-rw-r--r--arch/s390/kvm/interrupt.c12
-rw-r--r--arch/sh/include/asm/pgtable-3level.h2
-rw-r--r--arch/sh/kernel/vmlinux.lds.S1
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/boot/bioscall.S4
-rw-r--r--arch/x86/boot/compressed/ident_map_64.c6
-rw-r--r--arch/x86/boot/compressed/misc.h2
-rw-r--r--arch/x86/boot/compressed/sev.c70
-rw-r--r--arch/x86/coco/tdx/tdx.c26
-rw-r--r--arch/x86/events/core.c12
-rw-r--r--arch/x86/events/intel/core.c1
-rw-r--r--arch/x86/events/intel/cstate.c22
-rw-r--r--arch/x86/events/intel/uncore.c1
-rw-r--r--arch/x86/events/msr.c3
-rw-r--r--arch/x86/events/rapl.c5
-rw-r--r--arch/x86/include/asm/acpi.h8
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/debugreg.h26
-rw-r--r--arch/x86/include/asm/insn-eval.h18
-rw-r--r--arch/x86/include/asm/intel-family.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/msr-index.h20
-rw-r--r--arch/x86/include/uapi/asm/svm.h6
-rw-r--r--arch/x86/kernel/cpu/aperfmperf.c9
-rw-r--r--arch/x86/kernel/cpu/bugs.c2
-rw-r--r--arch/x86/kernel/cpu/common.c9
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c49
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c12
-rw-r--r--arch/x86/kernel/crash.c4
-rw-r--r--arch/x86/kernel/i8259.c1
-rw-r--r--arch/x86/kernel/irqinit.c4
-rw-r--r--arch/x86/kernel/kprobes/core.c2
-rw-r--r--arch/x86/kernel/sev.c18
-rw-r--r--arch/x86/kvm/cpuid.c32
-rw-r--r--arch/x86/kvm/pmu.h26
-rw-r--r--arch/x86/kvm/svm/nested.c12
-rw-r--r--arch/x86/kvm/vmx/vmx.c21
-rw-r--r--arch/x86/kvm/x86.c46
-rw-r--r--arch/x86/kvm/xen.c90
-rw-r--r--arch/x86/lib/insn-eval.c20
-rw-r--r--arch/x86/lib/iomap_copy_64.S2
-rw-r--r--arch/x86/mm/init.c4
-rw-r--r--arch/x86/pci/mmconfig-shared.c44
-rw-r--r--arch/x86/pci/xen.c2
-rw-r--r--arch/x86/um/elfcore.c4
-rw-r--r--arch/x86/xen/p2m.c5
-rw-r--r--arch/xtensa/include/asm/processor.h9
-rw-r--r--arch/xtensa/kernel/traps.c2
-rw-r--r--arch/xtensa/mm/fault.c4
-rw-r--r--block/Kconfig1
-rw-r--r--block/bfq-cgroup.c10
-rw-r--r--block/bfq-iosched.c4
-rw-r--r--block/bfq-iosched.h2
-rw-r--r--block/bio.c37
-rw-r--r--block/blk-cgroup.c8
-rw-r--r--block/blk-core.c3
-rw-r--r--block/blk-merge.c17
-rw-r--r--block/blk-mq.c16
-rw-r--r--block/genhd.c11
-rw-r--r--certs/Makefile4
-rw-r--r--certs/blacklist.c21
-rw-r--r--crypto/asymmetric_keys/Kconfig2
-rw-r--r--crypto/asymmetric_keys/pkcs7_verify.c1
-rw-r--r--crypto/asymmetric_keys/public_key.c24
-rw-r--r--drivers/accessibility/speakup/spk_ttyio.c3
-rw-r--r--drivers/acpi/glue.c14
-rw-r--r--drivers/acpi/nfit/core.c2
-rw-r--r--drivers/acpi/prmt.c10
-rw-r--r--drivers/acpi/resource.c7
-rw-r--r--drivers/acpi/scan.c7
-rw-r--r--drivers/acpi/sleep.c6
-rw-r--r--drivers/acpi/video_detect.c61
-rw-r--r--drivers/android/binderfs.c4
-rw-r--r--drivers/ata/Kconfig1
-rw-r--r--drivers/ata/ahci.c1
-rw-r--r--drivers/ata/libata-core.c5
-rw-r--r--drivers/ata/pata_octeon_cf.c16
-rw-r--r--drivers/base/devtmpfs.c12
-rw-r--r--drivers/base/property.c18
-rw-r--r--drivers/base/test/test_async_driver_probe.c2
-rw-r--r--drivers/block/Kconfig43
-rw-r--r--drivers/block/Makefile1
-rw-r--r--drivers/block/drbd/drbd_req.c2
-rw-r--r--drivers/block/pktcdvd.c2946
-rw-r--r--drivers/block/ps3vram.c2
-rw-r--r--drivers/block/rnbd/rnbd-clt.c2
-rw-r--r--drivers/block/ublk_drv.c12
-rw-r--r--drivers/block/virtio_blk.c35
-rw-r--r--drivers/block/xen-blkback/xenbus.c4
-rw-r--r--drivers/block/xen-blkfront.c3
-rw-r--r--drivers/bluetooth/hci_qca.c7
-rw-r--r--drivers/bus/sunxi-rsb.c8
-rw-r--r--drivers/char/tpm/eventlog/acpi.c5
-rw-r--r--drivers/char/tpm/eventlog/efi.c13
-rw-r--r--drivers/char/tpm/eventlog/of.c35
-rw-r--r--drivers/char/tpm/st33zp24/i2c.c5
-rw-r--r--drivers/char/tpm/tpm-chip.c1
-rw-r--r--drivers/char/tpm/tpm-interface.c4
-rw-r--r--drivers/char/tpm/tpm2-cmd.c4
-rw-r--r--drivers/char/tpm/tpm_crb.c100
-rw-r--r--drivers/char/tpm/tpm_i2c_atmel.c5
-rw-r--r--drivers/char/tpm/tpm_i2c_infineon.c5
-rw-r--r--drivers/char/tpm/tpm_i2c_nuvoton.c6
-rw-r--r--drivers/char/tpm/tpm_tis_i2c.c5
-rw-r--r--drivers/char/tpm/xen-tpmfront.c3
-rw-r--r--drivers/clk/ingenic/jz4760-cgu.c18
-rw-r--r--drivers/clk/microchip/clk-mpfs-ccc.c10
-rw-r--r--drivers/comedi/drivers/adv_pci1760.c2
-rw-r--r--drivers/cpufreq/amd-pstate.c1
-rw-r--r--drivers/cpufreq/apple-soc-cpufreq.c3
-rw-r--r--drivers/cpufreq/armada-37xx-cpufreq.c2
-rw-r--r--drivers/cpufreq/cppc_cpufreq.c11
-rw-r--r--drivers/cpufreq/cpufreq-dt-platdev.c2
-rw-r--r--drivers/cpufreq/qcom-cpufreq-hw.c48
-rw-r--r--drivers/crypto/atmel-ecc.c4
-rw-r--r--drivers/crypto/atmel-i2c.c4
-rw-r--r--drivers/crypto/atmel-i2c.h2
-rw-r--r--drivers/crypto/caam/blob_gen.c2
-rw-r--r--drivers/crypto/virtio/virtio_crypto_skcipher_algs.c3
-rw-r--r--drivers/cxl/acpi.c1
-rw-r--r--drivers/cxl/core/pmem.c42
-rw-r--r--drivers/cxl/core/region.c12
-rw-r--r--drivers/cxl/pci.c7
-rw-r--r--drivers/cxl/pmem.c24
-rw-r--r--drivers/dax/super.c2
-rw-r--r--drivers/dma-buf/dma-buf-sysfs-stats.c7
-rw-r--r--drivers/dma-buf/dma-buf-sysfs-stats.h4
-rw-r--r--drivers/dma-buf/dma-buf.c82
-rw-r--r--drivers/dma-buf/dma-fence.c2
-rw-r--r--drivers/dma/dmaengine.c7
-rw-r--r--drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c6
-rw-r--r--drivers/dma/idxd/device.c16
-rw-r--r--drivers/dma/imx-sdma.c4
-rw-r--r--drivers/dma/lgm/lgm-dma.c10
-rw-r--r--drivers/dma/ptdma/ptdma-dev.c7
-rw-r--r--drivers/dma/ptdma/ptdma.h2
-rw-r--r--drivers/dma/qcom/gpi.c1
-rw-r--r--drivers/dma/tegra186-gpc-dma.c1
-rw-r--r--drivers/dma/tegra210-adma.c2
-rw-r--r--drivers/dma/ti/k3-udma.c5
-rw-r--r--drivers/dma/xilinx/xilinx_dma.c4
-rw-r--r--drivers/edac/edac_device.c30
-rw-r--r--drivers/edac/edac_module.h2
-rw-r--r--drivers/edac/highbank_mc_edac.c7
-rw-r--r--drivers/edac/qcom_edac.c5
-rw-r--r--drivers/firewire/core-cdev.c4
-rw-r--r--drivers/firmware/arm_scmi/driver.c2
-rw-r--r--drivers/firmware/arm_scmi/shmem.c9
-rw-r--r--drivers/firmware/arm_scmi/virtio.c7
-rw-r--r--drivers/firmware/efi/efi.c11
-rw-r--r--drivers/firmware/efi/libstub/arm64.c9
-rw-r--r--drivers/firmware/efi/memattr.c2
-rw-r--r--drivers/firmware/efi/runtime-wrappers.c1
-rw-r--r--drivers/firmware/google/coreboot_table.c9
-rw-r--r--drivers/firmware/google/coreboot_table.h1
-rw-r--r--drivers/firmware/google/gsmi.c7
-rw-r--r--drivers/firmware/psci/psci.c3
-rw-r--r--drivers/fpga/intel-m10-bmc-sec-update.c17
-rw-r--r--drivers/fpga/stratix10-soc.c4
-rw-r--r--drivers/gpio/Kconfig1
-rw-r--r--drivers/gpio/gpio-eic-sprd.c23
-rw-r--r--drivers/gpio/gpio-ep93xx.c38
-rw-r--r--drivers/gpio/gpio-mxc.c16
-rw-r--r--drivers/gpio/gpio-pca953x.c3
-rw-r--r--drivers/gpio/gpio-pmic-eic-sprd.c29
-rw-r--r--drivers/gpio/gpio-sifive.c1
-rw-r--r--drivers/gpio/gpio-sim.c2
-rw-r--r--drivers/gpio/gpio-sprd.c9
-rw-r--r--drivers/gpio/gpio-vf610.c41
-rw-r--r--drivers/gpio/gpiolib-acpi.c32
-rw-r--r--drivers/gpio/gpiolib-acpi.h1
-rw-r--r--drivers/gpio/gpiolib.c2
-rw-r--r--drivers/gpu/drm/Kconfig3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c117
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c51
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link.c14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c6
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c12
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_pm.c8
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c14
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h5
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h29
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c7
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c8
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c8
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c2
-rw-r--r--drivers/gpu/drm/ast/ast_mode.c4
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c1
-rw-r--r--drivers/gpu/drm/display/drm_dp_mst_topology.c4
-rw-r--r--drivers/gpu/drm/drm_buddy.c81
-rw-r--r--drivers/gpu/drm/drm_client.c33
-rw-r--r--drivers/gpu/drm/drm_fb_helper.c7
-rw-r--r--drivers/gpu/drm/drm_fbdev_generic.c15
-rw-r--r--drivers/gpu/drm/drm_panel_orientation_quirks.c6
-rw-r--r--drivers/gpu/drm/drm_vma_manager.c76
-rw-r--r--drivers/gpu/drm/i915/display/intel_bios.c33
-rw-r--r--drivers/gpu/drm/i915/display/intel_cdclk.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbdev.c12
-rw-r--r--drivers/gpu/drm/i915/display/skl_universal_plane.c2
-rw-r--r--drivers/gpu/drm/i915/display/skl_watermark.c3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c38
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c14
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shmem.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_tiling.c9
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_pages.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c74
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c27
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_regs.h14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c37
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c34
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c28
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c14
-rw-r--r--drivers/gpu/drm/i915/gvt/debugfs.c36
-rw-r--r--drivers/gpu/drm/i915/gvt/dmabuf.c3
-rw-r--r--drivers/gpu/drm/i915/gvt/gtt.c21
-rw-r--r--drivers/gpu/drm/i915/gvt/gvt.h15
-rw-r--r--drivers/gpu/drm/i915/gvt/interrupt.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/kvmgt.c35
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.c4
-rw-r--r--drivers/gpu/drm/i915/gvt/vgpu.c12
-rw-r--r--drivers/gpu/drm/i915/i915_driver.c5
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c33
-rw-r--r--drivers/gpu/drm/i915/i915_pci.c3
-rw-r--r--drivers/gpu/drm/i915/i915_switcheroo.c6
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c3
-rw-r--r--drivers/gpu/drm/imx/ipuv3-plane.c14
-rw-r--r--drivers/gpu/drm/meson/meson_viu.c5
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gmu.c15
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.c7
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.h1
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_device.c5
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.c4
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.h10
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c3
-rw-r--r--drivers/gpu/drm/msm/dp/dp_aux.c4
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi.c12
-rw-r--r--drivers/gpu/drm/msm/msm_drv.c2
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.c2
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.h12
-rw-r--r--drivers/gpu/drm/msm/msm_mdss.c6
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h1
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fbcon.c613
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/firmware.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/base.c10
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c14
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c23
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/tu102.c55
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c2
-rw-r--r--drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c16
-rw-r--r--drivers/gpu/drm/panfrost/Kconfig3
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_drv.c27
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gem.c16
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gem.h5
-rw-r--r--drivers/gpu/drm/scheduler/sched_entity.c2
-rw-r--r--drivers/gpu/drm/scheduler/sched_main.c4
-rw-r--r--drivers/gpu/drm/solomon/ssd130x.c18
-rw-r--r--drivers/gpu/drm/tests/Makefile2
-rw-r--r--drivers/gpu/drm/tests/drm_mm_test.c6
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_util.c2
-rw-r--r--drivers/gpu/drm/vc4/vc4_bo.c6
-rw-r--r--drivers/gpu/drm/vc4/vc4_crtc.c2
-rw-r--r--drivers/gpu/drm/vc4/vc4_hdmi.c21
-rw-r--r--drivers/gpu/drm/vc4/vc4_plane.c6
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_ioctl.c24
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_object.c6
-rw-r--r--drivers/gpu/drm/vmwgfx/ttm_object.c41
-rw-r--r--drivers/gpu/drm/vmwgfx/ttm_object.h14
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_bo.c50
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.h18
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c178
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_gem.c8
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.c4
-rw-r--r--[-rwxr-xr-x]drivers/gpu/drm/vmwgfx/vmwgfx_msg_arm64.h0
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c1
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_resource.c33
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_shader.c1
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_surface.c10
-rw-r--r--drivers/gpu/drm/xen/xen_drm_front.c3
-rw-r--r--drivers/hid/amd-sfh-hid/amd_sfh_client.c15
-rw-r--r--drivers/hid/amd-sfh-hid/amd_sfh_hid.h1
-rw-r--r--drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c2
-rw-r--r--drivers/hid/hid-betopff.c17
-rw-r--r--drivers/hid/hid-bigbenff.c5
-rw-r--r--drivers/hid/hid-core.c7
-rw-r--r--drivers/hid/hid-elecom.c16
-rw-r--r--drivers/hid/hid-ids.h7
-rw-r--r--drivers/hid/hid-input.c4
-rw-r--r--drivers/hid/hid-logitech-hidpp.c3
-rw-r--r--drivers/hid/hid-playstation.c63
-rw-r--r--drivers/hid/hid-quirks.c4
-rw-r--r--drivers/hid/hid-uclogic-core.c2
-rw-r--r--drivers/hid/hid-uclogic-params.c2
-rw-r--r--drivers/hid/intel-ish-hid/ishtp/dma-if.c10
-rw-r--r--drivers/hv/hv_balloon.c2
-rw-r--r--drivers/i2c/busses/i2c-axxia.c2
-rw-r--r--drivers/i2c/busses/i2c-designware-common.c9
-rw-r--r--drivers/i2c/busses/i2c-designware-pcidrv.c2
-rw-r--r--drivers/i2c/busses/i2c-designware-platdrv.c20
-rw-r--r--drivers/i2c/busses/i2c-mxs.c4
-rw-r--r--drivers/i2c/busses/i2c-rk3x.c44
-rw-r--r--drivers/iio/accel/hid-sensor-accel-3d.c1
-rw-r--r--drivers/iio/adc/berlin2-adc.c4
-rw-r--r--drivers/iio/adc/imx8qxp-adc.c11
-rw-r--r--drivers/iio/adc/stm32-dfsdm-adc.c1
-rw-r--r--drivers/iio/adc/twl6030-gpadc.c32
-rw-r--r--drivers/iio/adc/xilinx-ams.c2
-rw-r--r--drivers/iio/gyro/hid-sensor-gyro-3d.c1
-rw-r--r--drivers/iio/imu/fxos8700_core.c111
-rw-r--r--drivers/iio/imu/st_lsm6dsx/Kconfig1
-rw-r--r--drivers/iio/light/cm32181.c9
-rw-r--r--drivers/infiniband/core/umem_dmabuf.c8
-rw-r--r--drivers/infiniband/core/verbs.c7
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c7
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c209
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.h3
-rw-r--r--drivers/infiniband/hw/irdma/cm.c3
-rw-r--r--drivers/infiniband/hw/mana/qp.c2
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c6
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c49
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c8
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h10
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c22
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c8
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c3
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h8
-rw-r--r--drivers/input/misc/xen-kbdfront.c5
-rw-r--r--drivers/input/mouse/synaptics.c1
-rw-r--r--drivers/input/serio/i8042-acpipnpio.h7
-rw-r--r--drivers/interconnect/qcom/icc-rpm.c2
-rw-r--r--drivers/interconnect/qcom/msm8996.c19
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c4
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu.c32
-rw-r--r--drivers/iommu/iommu.c8
-rw-r--r--drivers/iommu/iova.c4
-rw-r--r--drivers/iommu/mtk_iommu_v1.c4
-rw-r--r--drivers/md/bcache/bcache_ondisk.h3
-rw-r--r--drivers/md/bcache/journal.c3
-rw-r--r--drivers/md/dm.c2
-rw-r--r--drivers/md/md.c6
-rw-r--r--drivers/media/common/videobuf2/videobuf2-core.c5
-rw-r--r--drivers/media/v4l2-core/v4l2-ctrls-api.c2
-rw-r--r--drivers/memory/atmel-sdramc.c6
-rw-r--r--drivers/memory/mvebu-devbus.c3
-rw-r--r--drivers/memory/omap-gpmc.c3
-rw-r--r--drivers/memory/tegra/tegra186.c36
-rw-r--r--drivers/misc/fastrpc.c83
-rw-r--r--drivers/misc/mei/bus.c12
-rw-r--r--drivers/misc/mei/hw-me-regs.h2
-rw-r--r--drivers/misc/mei/pci-me.c2
-rw-r--r--drivers/misc/vmw_vmci/vmci_guest.c49
-rw-r--r--drivers/mmc/core/sdio_bus.c17
-rw-r--r--drivers/mmc/core/sdio_cis.c12
-rw-r--r--drivers/mmc/host/jz4740_mmc.c10
-rw-r--r--drivers/mmc/host/meson-gx-mmc.c23
-rw-r--r--drivers/mmc/host/mmc_spi.c8
-rw-r--r--drivers/mmc/host/sdhci-esdhc-imx.c22
-rw-r--r--drivers/mmc/host/sunxi-mmc.c8
-rw-r--r--drivers/mtd/parsers/scpart.c2
-rw-r--r--drivers/mtd/parsers/tplink_safeloader.c4
-rw-r--r--drivers/mtd/spi-nor/core.c1
-rw-r--r--drivers/net/bonding/bond_3ad.c1
-rw-r--r--drivers/net/bonding/bond_debugfs.c2
-rw-r--r--drivers/net/bonding/bond_main.c8
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-ethtool.c1
-rw-r--r--drivers/net/dsa/Kconfig7
-rw-r--r--drivers/net/dsa/microchip/ksz9477.c4
-rw-r--r--drivers/net/dsa/microchip/ksz9477_i2c.c2
-rw-r--r--drivers/net/dsa/mt7530.c26
-rw-r--r--drivers/net/dsa/mv88e6xxx/Kconfig4
-rw-r--r--drivers/net/dsa/qca/qca8k-8xxx.c164
-rw-r--r--drivers/net/dsa/qca/qca8k.h5
-rw-r--r--drivers/net/ethernet/adi/adin1110.c2
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_com.c29
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_ethtool.c6
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.c83
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.h17
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-dev.c23
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-drv.c3
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-i2c.c4
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-mdio.c28
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe.h2
-rw-r--r--drivers/net/ethernet/broadcom/bcm63xx_enet.c6
-rw-r--r--drivers/net/ethernet/broadcom/bgmac-bcma.c6
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c39
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h15
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c13
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h9
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c20
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h6
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c8
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c40
-rw-r--r--drivers/net/ethernet/engleder/tsnep_main.c15
-rw-r--r--drivers/net/ethernet/freescale/dpaa/dpaa_eth.c6
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c9
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.c4
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_ierb.c6
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c2
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_dtsec.c2
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_memac.c3
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c10
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c132
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h7
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c71
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c5
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c4
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf.h2
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_ethtool.c10
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c115
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_virtchnl.c10
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c9
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_lib.c23
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_lib.h4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_devlink.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c28
-rw-r--r--drivers/net/ethernet/intel/ice/ice_gnss.c24
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c58
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_tc_lib.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_mbx.c21
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c16
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c17
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c54
-rw-r--r--drivers/net/ethernet/intel/igc/igc_defines.h2
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c25
-rw-r--r--drivers/net/ethernet/intel/igc/igc_ptp.c24
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe.h2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c28
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c14
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx.c4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx.h1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c35
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c19
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c2
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c30
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.h4
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe.c3
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe.h1
-rw-r--r--drivers/net/ethernet/mediatek/mtk_sgmii.c46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/debugfs.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ecpf.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/htb.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c95
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qos.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qos.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sriov.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c24
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c2
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c6
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.c13
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_port.c2
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c3
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c3
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_main.c2
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c4
-rw-r--r--drivers/net/ethernet/microsoft/mana/gdma_main.c46
-rw-r--r--drivers/net/ethernet/mscc/ocelot_flower.c24
-rw-r--r--drivers/net/ethernet/mscc/ocelot_ptp.c8
-rw-r--r--drivers/net/ethernet/netronome/nfp/crypto/ipsec.c39
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c8
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net.h20
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c65
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h1
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c194
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_port.h12
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_dev.c9
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_dev.h12
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c68
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.h2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_main.c29
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_txrx.c87
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_debug.c28
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_fp.c7
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c8
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h10
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c8
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c63
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c10
-rw-r--r--drivers/net/ethernet/renesas/rswitch.c32
-rw-r--r--drivers/net/ethernet/renesas/rswitch.h12
-rw-r--r--drivers/net/ethernet/sfc/efx.c5
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c26
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac5.c17
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c5
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.c12
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.h1
-rw-r--r--drivers/net/hyperv/netvsc.c11
-rw-r--r--drivers/net/ipa/data/ipa_data-v4.7.c2
-rw-r--r--drivers/net/ipa/ipa_interrupt.c13
-rw-r--r--drivers/net/ipa/ipa_interrupt.h16
-rw-r--r--drivers/net/ipa/ipa_power.c17
-rw-r--r--drivers/net/mdio/mdio-mux-meson-g12a.c23
-rw-r--r--drivers/net/phy/dp83822.c6
-rw-r--r--drivers/net/phy/mdio_bus.c7
-rw-r--r--drivers/net/phy/meson-gxl.c4
-rw-r--r--drivers/net/phy/phy_device.c2
-rw-r--r--drivers/net/phy/phylink.c5
-rw-r--r--drivers/net/phy/xilinx_gmii2rgmii.c1
-rw-r--r--drivers/net/team/team.c2
-rw-r--r--drivers/net/usb/cdc_ether.c6
-rw-r--r--drivers/net/usb/kalmia.c8
-rw-r--r--drivers/net/usb/plusb.c4
-rw-r--r--drivers/net/usb/r8152.c1
-rw-r--r--drivers/net/usb/rndis_host.c3
-rw-r--r--drivers/net/usb/sr9700.c2
-rw-r--r--drivers/net/veth.c5
-rw-r--r--drivers/net/virtio_net.c14
-rw-r--r--drivers/net/vmxnet3/vmxnet3_drv.c58
-rw-r--r--drivers/net/vrf.c6
-rw-r--r--drivers/net/vxlan/vxlan_core.c19
-rw-r--r--drivers/net/wan/fsl_ucc_hdlc.c6
-rw-r--r--drivers/net/wireless/ath/ath9k/htc.h14
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c37
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c5
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/acpi.c5
-rw-r--r--drivers/net/wireless/mediatek/mt76/dma.c131
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7915/mmio.c7
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7996/Kconfig1
-rw-r--r--drivers/net/wireless/mediatek/mt76/tx.c7
-rw-r--r--drivers/net/wireless/rndis_wlan.c19
-rw-r--r--drivers/net/wireless/ti/Makefile3
-rw-r--r--drivers/net/wwan/t7xx/t7xx_hif_dpmaif.c11
-rw-r--r--drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c29
-rw-r--r--drivers/net/wwan/t7xx/t7xx_netdev.c16
-rw-r--r--drivers/net/wwan/t7xx/t7xx_pci.c2
-rw-r--r--drivers/net/xen-netback/xenbus.c3
-rw-r--r--drivers/net/xen-netfront.c4
-rw-r--r--drivers/nfc/pn533/usb.c44
-rw-r--r--drivers/nvdimm/Kconfig19
-rw-r--r--drivers/nvdimm/nd.h2
-rw-r--r--drivers/nvdimm/pfn_devs.c42
-rw-r--r--drivers/nvme/host/apple.c26
-rw-r--r--drivers/nvme/host/auth.c14
-rw-r--r--drivers/nvme/host/core.c7
-rw-r--r--drivers/nvme/host/fc.c18
-rw-r--r--drivers/nvme/host/ioctl.c110
-rw-r--r--drivers/nvme/host/multipath.c2
-rw-r--r--drivers/nvme/host/pci.c43
-rw-r--r--drivers/nvme/target/fc.c4
-rw-r--r--drivers/nvmem/brcm_nvram.c3
-rw-r--r--drivers/nvmem/core.c60
-rw-r--r--drivers/nvmem/qcom-spmi-sdam.c1
-rw-r--r--drivers/nvmem/sunxi_sid.c15
-rw-r--r--drivers/of/address.c21
-rw-r--r--drivers/of/fdt.c66
-rw-r--r--drivers/of/of_reserved_mem.c3
-rw-r--r--drivers/of/platform.c12
-rw-r--r--drivers/parisc/pdc_stable.c9
-rw-r--r--drivers/pci/controller/dwc/Kconfig2
-rw-r--r--drivers/pci/pci.c7
-rw-r--r--drivers/pci/pci.h4
-rw-r--r--drivers/pci/pcie/aspm.c111
-rw-r--r--drivers/pci/xen-pcifront.c4
-rw-r--r--drivers/perf/arm-cmn.c7
-rw-r--r--drivers/perf/arm_pmu.c8
-rw-r--r--drivers/phy/freescale/phy-fsl-imx8m-pcie.c2
-rw-r--r--drivers/phy/phy-can-transceiver.c5
-rw-r--r--drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c13
-rw-r--r--drivers/phy/renesas/r8a779f0-ether-serdes.c2
-rw-r--r--drivers/phy/rockchip/phy-rockchip-inno-usb2.c4
-rw-r--r--drivers/phy/sunplus/phy-sunplus-usb2.c3
-rw-r--r--drivers/phy/ti/Kconfig4
-rw-r--r--drivers/pinctrl/aspeed/pinctrl-aspeed.c13
-rw-r--r--drivers/pinctrl/intel/pinctrl-intel.c16
-rw-r--r--drivers/pinctrl/mediatek/pinctrl-mt8195.c4
-rw-r--r--drivers/pinctrl/nomadik/pinctrl-ab8500.c3
-rw-r--r--drivers/pinctrl/nomadik/pinctrl-ab8505.c3
-rw-r--r--drivers/pinctrl/nomadik/pinctrl-abx500.c32
-rw-r--r--drivers/pinctrl/nomadik/pinctrl-abx500.h4
-rw-r--r--drivers/pinctrl/nomadik/pinctrl-nomadik-db8500.c3
-rw-r--r--drivers/pinctrl/nomadik/pinctrl-nomadik-stn8815.c3
-rw-r--r--drivers/pinctrl/nomadik/pinctrl-nomadik.c32
-rw-r--r--drivers/pinctrl/nomadik/pinctrl-nomadik.h5
-rw-r--r--drivers/pinctrl/pinctrl-amd.c1
-rw-r--r--drivers/pinctrl/pinctrl-rockchip.c31
-rw-r--r--drivers/pinctrl/pinctrl-single.c2
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sm8450-lpass-lpi.c2
-rw-r--r--drivers/pinctrl/sunplus/sppctl.c7
-rw-r--r--drivers/platform/surface/aggregator/controller.c4
-rw-r--r--drivers/platform/surface/aggregator/ssh_request_layer.c14
-rw-r--r--drivers/platform/x86/amd/Kconfig1
-rw-r--r--drivers/platform/x86/amd/pmc.c60
-rw-r--r--drivers/platform/x86/amd/pmf/auto-mode.c9
-rw-r--r--drivers/platform/x86/amd/pmf/cnqf.c14
-rw-r--r--drivers/platform/x86/amd/pmf/core.c32
-rw-r--r--drivers/platform/x86/amd/pmf/pmf.h3
-rw-r--r--drivers/platform/x86/amd/pmf/sps.c28
-rw-r--r--drivers/platform/x86/apple-gmux.c93
-rw-r--r--drivers/platform/x86/asus-nb-wmi.c15
-rw-r--r--drivers/platform/x86/asus-wmi.c24
-rw-r--r--drivers/platform/x86/asus-wmi.h1
-rw-r--r--drivers/platform/x86/dell/dell-wmi-base.c3
-rw-r--r--drivers/platform/x86/dell/dell-wmi-privacy.c41
-rw-r--r--drivers/platform/x86/gigabyte-wmi.c1
-rw-r--r--drivers/platform/x86/hp/hp-wmi.c6
-rw-r--r--drivers/platform/x86/ideapad-laptop.c6
-rw-r--r--drivers/platform/x86/intel/int3472/clk_and_regulator.c3
-rw-r--r--drivers/platform/x86/intel/int3472/discrete.c4
-rw-r--r--drivers/platform/x86/intel/pmc/core.c1
-rw-r--r--drivers/platform/x86/intel/vsec.c9
-rw-r--r--drivers/platform/x86/simatic-ipc.c3
-rw-r--r--drivers/platform/x86/sony-laptop.c21
-rw-r--r--drivers/platform/x86/thinkpad_acpi.c34
-rw-r--r--drivers/platform/x86/touchscreen_dmi.c34
-rw-r--r--drivers/regulator/da9211-regulator.c11
-rw-r--r--drivers/regulator/qcom-rpmh-regulator.c2
-rw-r--r--drivers/reset/Kconfig2
-rw-r--r--drivers/reset/reset-uniphier-glue.c4
-rw-r--r--drivers/rtc/rtc-efi.c48
-rw-r--r--drivers/rtc/rtc-sunplus.c4
-rw-r--r--drivers/s390/block/dcssblk.c2
-rw-r--r--drivers/s390/net/qeth_core_sys.c12
-rw-r--r--drivers/scsi/device_handler/scsi_dh_alua.c5
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_main.c6
-rw-r--r--drivers/scsi/hpsa.c2
-rw-r--r--drivers/scsi/iscsi_tcp.c20
-rw-r--r--drivers/scsi/libiscsi.c38
-rw-r--r--drivers/scsi/libsas/sas_ata.c3
-rw-r--r--drivers/scsi/mpi3mr/Makefile2
-rw-r--r--drivers/scsi/mpi3mr/mpi3mr_fw.c3
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.c3
-rw-r--r--drivers/scsi/scsi.c2
-rw-r--r--drivers/scsi/scsi_debug.c2
-rw-r--r--drivers/scsi/scsi_error.c5
-rw-r--r--drivers/scsi/scsi_scan.c7
-rw-r--r--drivers/scsi/scsi_sysfs.c2
-rw-r--r--drivers/scsi/scsi_transport_iscsi.c50
-rw-r--r--drivers/scsi/storvsc_drv.c3
-rw-r--r--drivers/scsi/xen-scsifront.c4
-rw-r--r--drivers/soc/imx/imx8mp-blk-ctrl.c7
-rw-r--r--drivers/soc/imx/soc-imx8m.c4
-rw-r--r--drivers/soc/qcom/apr.c3
-rw-r--r--drivers/soc/qcom/cpr.c6
-rw-r--r--drivers/spi/spi-cadence-xspi.c5
-rw-r--r--drivers/spi/spi-dw-core.c2
-rw-r--r--drivers/spi/spi-mt65xx.c12
-rw-r--r--drivers/spi/spi.c23
-rw-r--r--drivers/spi/spidev.c56
-rw-r--r--drivers/staging/vc04_services/include/linux/raspberrypi/vchiq.h2
-rw-r--r--drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h4
-rw-r--r--drivers/target/target_core_tmr.c4
-rw-r--r--drivers/tee/tee_shm.c37
-rw-r--r--drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c28
-rw-r--r--drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h1
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c4
-rw-r--r--drivers/thermal/thermal_core.c13
-rw-r--r--drivers/thunderbolt/retimer.c20
-rw-r--r--drivers/thunderbolt/tb.c20
-rw-r--r--drivers/thunderbolt/tunnel.c2
-rw-r--r--drivers/thunderbolt/xdomain.c17
-rw-r--r--drivers/tty/hvc/hvc_xen.c50
-rw-r--r--drivers/tty/serial/8250/8250_dma.c21
-rw-r--r--drivers/tty/serial/8250/8250_exar.c14
-rw-r--r--drivers/tty/serial/amba-pl011.c8
-rw-r--r--drivers/tty/serial/atmel_serial.c8
-rw-r--r--drivers/tty/serial/kgdboc.c20
-rw-r--r--drivers/tty/serial/pch_uart.c2
-rw-r--r--drivers/tty/serial/qcom_geni_serial.c32
-rw-r--r--drivers/tty/serial/serial_core.c5
-rw-r--r--drivers/tty/serial/stm32-usart.c33
-rw-r--r--drivers/tty/vt/vc_screen.c9
-rw-r--r--drivers/ufs/core/ufshcd.c55
-rw-r--r--drivers/usb/cdns3/cdns3-gadget.c12
-rw-r--r--drivers/usb/chipidea/core.c4
-rw-r--r--drivers/usb/common/ulpi.c2
-rw-r--r--drivers/usb/core/hub.c13
-rw-r--r--drivers/usb/core/quirks.c3
-rw-r--r--drivers/usb/core/usb-acpi.c65
-rw-r--r--drivers/usb/dwc3/Kconfig2
-rw-r--r--drivers/usb/dwc3/dwc3-qcom.c2
-rw-r--r--drivers/usb/dwc3/dwc3-xilinx.c1
-rw-r--r--drivers/usb/dwc3/gadget.c5
-rw-r--r--drivers/usb/fotg210/fotg210-core.c5
-rw-r--r--drivers/usb/fotg210/fotg210-udc.c3
-rw-r--r--drivers/usb/gadget/configfs.c12
-rw-r--r--drivers/usb/gadget/function/f_fs.c9
-rw-r--r--drivers/usb/gadget/function/f_ncm.c4
-rw-r--r--drivers/usb/gadget/function/f_uac2.c1
-rw-r--r--drivers/usb/gadget/function/u_ether.c4
-rw-r--r--drivers/usb/gadget/legacy/inode.c28
-rw-r--r--drivers/usb/gadget/legacy/webcam.c3
-rw-r--r--drivers/usb/gadget/udc/bcm63xx_udc.c1
-rw-r--r--drivers/usb/gadget/udc/fsl_qe_udc.c1
-rw-r--r--drivers/usb/gadget/udc/fsl_udc_core.c1
-rw-r--r--drivers/usb/gadget/udc/fusb300_udc.c1
-rw-r--r--drivers/usb/gadget/udc/goku_udc.c1
-rw-r--r--drivers/usb/gadget/udc/gr_udc.c1
-rw-r--r--drivers/usb/gadget/udc/m66592-udc.c1
-rw-r--r--drivers/usb/gadget/udc/max3420_udc.c1
-rw-r--r--drivers/usb/gadget/udc/mv_u3d_core.c1
-rw-r--r--drivers/usb/gadget/udc/mv_udc_core.c1
-rw-r--r--drivers/usb/gadget/udc/net2272.c1
-rw-r--r--drivers/usb/gadget/udc/net2280.c1
-rw-r--r--drivers/usb/gadget/udc/omap_udc.c1
-rw-r--r--drivers/usb/gadget/udc/pch_udc.c1
-rw-r--r--drivers/usb/gadget/udc/snps_udc_core.c1
-rw-r--r--drivers/usb/host/ehci-fsl.c2
-rw-r--r--drivers/usb/host/xen-hcd.c4
-rw-r--r--drivers/usb/host/xhci-pci.c45
-rw-r--r--drivers/usb/host/xhci-ring.c5
-rw-r--r--drivers/usb/host/xhci.c18
-rw-r--r--drivers/usb/host/xhci.h5
-rw-r--r--drivers/usb/misc/iowarrior.c2
-rw-r--r--drivers/usb/misc/onboard_usb_hub.c18
-rw-r--r--drivers/usb/musb/omap2430.c4
-rw-r--r--drivers/usb/serial/cp210x.c1
-rw-r--r--drivers/usb/serial/option.c17
-rw-r--r--drivers/usb/storage/uas-detect.h13
-rw-r--r--drivers/usb/storage/unusual_uas.h7
-rw-r--r--drivers/usb/typec/altmodes/displayport.c30
-rw-r--r--drivers/usb/typec/tcpm/tcpm.c7
-rw-r--r--drivers/usb/typec/ucsi/ucsi.c33
-rw-r--r--drivers/usb/typec/ucsi/ucsi.h1
-rw-r--r--drivers/vdpa/ifcvf/ifcvf_main.c2
-rw-r--r--drivers/vdpa/mlx5/core/mlx5_vdpa.h5
-rw-r--r--drivers/vdpa/mlx5/core/mr.c46
-rw-r--r--drivers/vdpa/mlx5/net/mlx5_vnet.c78
-rw-r--r--drivers/vdpa/vdpa.c11
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim.c7
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim_blk.c4
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim_net.c7
-rw-r--r--drivers/vdpa/vdpa_user/vduse_dev.c3
-rw-r--r--drivers/vdpa/virtio_pci/vp_vdpa.c2
-rw-r--r--drivers/vfio/vfio_iommu_type1.c31
-rw-r--r--drivers/vhost/net.c3
-rw-r--r--drivers/vhost/scsi.c21
-rw-r--r--drivers/vhost/vdpa.c52
-rw-r--r--drivers/vhost/vhost.c7
-rw-r--r--drivers/vhost/vhost.h1
-rw-r--r--drivers/vhost/vringh.c5
-rw-r--r--drivers/vhost/vsock.c9
-rw-r--r--drivers/video/fbdev/Kconfig4
-rw-r--r--drivers/video/fbdev/atmel_lcdfb.c22
-rw-r--r--drivers/video/fbdev/aty/aty128fb.c6
-rw-r--r--drivers/video/fbdev/aty/atyfb_base.c11
-rw-r--r--drivers/video/fbdev/aty/radeon_backlight.c6
-rw-r--r--drivers/video/fbdev/core/fb_defio.c10
-rw-r--r--drivers/video/fbdev/core/fbcon.c7
-rw-r--r--drivers/video/fbdev/core/fbmem.c4
-rw-r--r--drivers/video/fbdev/core/fbmon.c2
-rw-r--r--drivers/video/fbdev/matrox/matroxfb_base.c4
-rw-r--r--drivers/video/fbdev/mx3fb.c7
-rw-r--r--drivers/video/fbdev/nvidia/nv_backlight.c8
-rw-r--r--drivers/video/fbdev/nvidia/nvidia.c81
-rw-r--r--drivers/video/fbdev/omap/omapfb_main.c5
-rw-r--r--drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c8
-rw-r--r--drivers/video/fbdev/omap2/omapfb/dss/display-sysfs.c7
-rw-r--r--drivers/video/fbdev/omap2/omapfb/dss/dsi.c28
-rw-r--r--drivers/video/fbdev/omap2/omapfb/dss/manager-sysfs.c7
-rw-r--r--drivers/video/fbdev/omap2/omapfb/dss/overlay-sysfs.c3
-rw-r--r--drivers/video/fbdev/omap2/omapfb/omapfb-sysfs.c3
-rw-r--r--drivers/video/fbdev/riva/fbdev.c8
-rw-r--r--drivers/video/fbdev/xen-fbfront.c6
-rw-r--r--drivers/virtio/virtio.c12
-rw-r--r--drivers/virtio/virtio_pci_modern.c4
-rw-r--r--drivers/virtio/virtio_ring.c2
-rw-r--r--drivers/w1/w1.c6
-rw-r--r--drivers/w1/w1_int.c5
-rw-r--r--drivers/watchdog/diag288_wdt.c15
-rw-r--r--drivers/xen/pvcalls-back.c3
-rw-r--r--drivers/xen/pvcalls-front.c7
-rw-r--r--drivers/xen/xen-pciback/xenbus.c4
-rw-r--r--drivers/xen/xen-scsiback.c4
-rw-r--r--fs/9p/acl.c10
-rw-r--r--fs/9p/acl.h4
-rw-r--r--fs/9p/v9fs.h2
-rw-r--r--fs/9p/v9fs_vfs.h2
-rw-r--r--fs/9p/vfs_file.c1
-rw-r--r--fs/9p/vfs_inode.c38
-rw-r--r--fs/9p/vfs_inode_dotl.c32
-rw-r--r--fs/9p/xattr.c2
-rw-r--r--fs/Makefile2
-rw-r--r--fs/adfs/adfs.h2
-rw-r--r--fs/adfs/inode.c4
-rw-r--r--fs/affs/affs.h10
-rw-r--r--fs/affs/file.c2
-rw-r--r--fs/affs/inode.c6
-rw-r--r--fs/affs/namei.c8
-rw-r--r--fs/afs/cmservice.c6
-rw-r--r--fs/afs/dir.c16
-rw-r--r--fs/afs/flock.c14
-rw-r--r--fs/afs/inode.c6
-rw-r--r--fs/afs/internal.h7
-rw-r--r--fs/afs/rxrpc.c24
-rw-r--r--fs/afs/security.c2
-rw-r--r--fs/afs/xattr.c4
-rw-r--r--fs/aio.c4
-rw-r--r--fs/attr.c130
-rw-r--r--fs/autofs/root.c14
-rw-r--r--fs/bad_inode.c20
-rw-r--r--fs/bfs/dir.c6
-rw-r--r--fs/binfmt_elf.c4
-rw-r--r--fs/binfmt_elf_fdpic.c4
-rw-r--r--fs/btrfs/Makefile6
-rw-r--r--fs/btrfs/acl.c4
-rw-r--r--fs/btrfs/acl.h2
-rw-r--r--fs/btrfs/backref.c37
-rw-r--r--fs/btrfs/bio.c568
-rw-r--r--fs/btrfs/bio.h67
-rw-r--r--fs/btrfs/block-group.c273
-rw-r--r--fs/btrfs/block-group.h24
-rw-r--r--fs/btrfs/btrfs_inode.h24
-rw-r--r--fs/btrfs/compression.c276
-rw-r--r--fs/btrfs/compression.h3
-rw-r--r--fs/btrfs/ctree.c62
-rw-r--r--fs/btrfs/ctree.h15
-rw-r--r--fs/btrfs/defrag.c10
-rw-r--r--fs/btrfs/delayed-ref.c24
-rw-r--r--fs/btrfs/delayed-ref.h2
-rw-r--r--fs/btrfs/discard.c41
-rw-r--r--fs/btrfs/disk-io.c245
-rw-r--r--fs/btrfs/disk-io.h16
-rw-r--r--fs/btrfs/extent-io-tree.c12
-rw-r--r--fs/btrfs/extent-io-tree.h1
-rw-r--r--fs/btrfs/extent-tree.c188
-rw-r--r--fs/btrfs/extent-tree.h81
-rw-r--r--fs/btrfs/extent_io.c604
-rw-r--r--fs/btrfs/extent_io.h36
-rw-r--r--fs/btrfs/file-item.c72
-rw-r--r--fs/btrfs/file-item.h8
-rw-r--r--fs/btrfs/file.c17
-rw-r--r--fs/btrfs/free-space-tree.c2
-rw-r--r--fs/btrfs/fs.c4
-rw-r--r--fs/btrfs/fs.h17
-rw-r--r--fs/btrfs/inode.c701
-rw-r--r--fs/btrfs/ioctl.c72
-rw-r--r--fs/btrfs/ioctl.h2
-rw-r--r--fs/btrfs/lru_cache.c166
-rw-r--r--fs/btrfs/lru_cache.h80
-rw-r--r--fs/btrfs/lzo.c2
-rw-r--r--fs/btrfs/messages.c30
-rw-r--r--fs/btrfs/messages.h34
-rw-r--r--fs/btrfs/ordered-data.c25
-rw-r--r--fs/btrfs/ordered-data.h3
-rw-r--r--fs/btrfs/qgroup.c42
-rw-r--r--fs/btrfs/raid56.c350
-rw-r--r--fs/btrfs/raid56.h4
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/scrub.c51
-rw-r--r--fs/btrfs/send.c690
-rw-r--r--fs/btrfs/space-info.c3
-rw-r--r--fs/btrfs/super.c6
-rw-r--r--fs/btrfs/sysfs.c41
-rw-r--r--fs/btrfs/sysfs.h3
-rw-r--r--fs/btrfs/tests/btrfs-tests.c2
-rw-r--r--fs/btrfs/tests/extent-map-tests.c2
-rw-r--r--fs/btrfs/transaction.c34
-rw-r--r--fs/btrfs/transaction.h31
-rw-r--r--fs/btrfs/tree-log.c136
-rw-r--r--fs/btrfs/tree-log.h11
-rw-r--r--fs/btrfs/verity.c19
-rw-r--r--fs/btrfs/volumes.c199
-rw-r--r--fs/btrfs/volumes.h18
-rw-r--r--fs/btrfs/xattr.c4
-rw-r--r--fs/btrfs/zlib.c2
-rw-r--r--fs/btrfs/zoned.c148
-rw-r--r--fs/btrfs/zoned.h20
-rw-r--r--fs/buffer.c72
-rw-r--r--fs/cachefiles/interface.c4
-rw-r--r--fs/cachefiles/namei.c12
-rw-r--r--fs/cachefiles/xattr.c10
-rw-r--r--fs/ceph/acl.c4
-rw-r--r--fs/ceph/addr.c17
-rw-r--r--fs/ceph/caps.c19
-rw-r--r--fs/ceph/dir.c12
-rw-r--r--fs/ceph/file.c3
-rw-r--r--fs/ceph/inode.c30
-rw-r--r--fs/ceph/locks.c25
-rw-r--r--fs/ceph/mds_client.c36
-rw-r--r--fs/ceph/snap.c36
-rw-r--r--fs/ceph/super.h20
-rw-r--r--fs/ceph/xattr.c2
-rw-r--r--fs/cifs/cifsacl.c4
-rw-r--r--fs/cifs/cifsencrypt.c1
-rw-r--r--fs/cifs/cifsfs.c5
-rw-r--r--fs/cifs/cifsfs.h14
-rw-r--r--fs/cifs/cifsglob.h1
-rw-r--r--fs/cifs/cifsproto.h4
-rw-r--r--fs/cifs/cifssmb.c1
-rw-r--r--fs/cifs/connect.c9
-rw-r--r--fs/cifs/dfs.c25
-rw-r--r--fs/cifs/dfs_cache.c244
-rw-r--r--fs/cifs/dfs_cache.h3
-rw-r--r--fs/cifs/dir.c4
-rw-r--r--fs/cifs/file.c5
-rw-r--r--fs/cifs/inode.c18
-rw-r--r--fs/cifs/link.c3
-rw-r--r--fs/cifs/misc.c2
-rw-r--r--fs/cifs/sess.c5
-rw-r--r--fs/cifs/smb1ops.c63
-rw-r--r--fs/cifs/smb2file.c1
-rw-r--r--fs/cifs/smb2ops.c12
-rw-r--r--fs/cifs/smb2pdu.c32
-rw-r--r--fs/cifs/smbdirect.c1
-rw-r--r--fs/cifs/xattr.c2
-rw-r--r--fs/coda/coda_linux.h6
-rw-r--r--fs/coda/dir.c10
-rw-r--r--fs/coda/inode.c6
-rw-r--r--fs/coda/pioctl.c4
-rw-r--r--fs/configfs/configfs_internal.h4
-rw-r--r--fs/configfs/dir.c2
-rw-r--r--fs/configfs/inode.c4
-rw-r--r--fs/configfs/symlink.c4
-rw-r--r--fs/coredump.c56
-rw-r--r--fs/crypto/bio.c10
-rw-r--r--fs/crypto/crypto.c28
-rw-r--r--fs/crypto/fscrypt_private.h4
-rw-r--r--fs/crypto/keyring.c26
-rw-r--r--fs/crypto/keysetup.c23
-rw-r--r--fs/crypto/policy.c5
-rw-r--r--fs/dax.c5
-rw-r--r--fs/debugfs/inode.c6
-rw-r--r--fs/dlm/plock.c1
-rw-r--r--fs/ecryptfs/crypto.c2
-rw-r--r--fs/ecryptfs/inode.c70
-rw-r--r--fs/ecryptfs/mmap.c2
-rw-r--r--fs/efivarfs/inode.c4
-rw-r--r--fs/erofs/Kconfig18
-rw-r--r--fs/erofs/data.c23
-rw-r--r--fs/erofs/dir.c17
-rw-r--r--fs/erofs/fscache.c146
-rw-r--r--fs/erofs/inode.c46
-rw-r--r--fs/erofs/internal.h148
-rw-r--r--fs/erofs/namei.c18
-rw-r--r--fs/erofs/super.c16
-rw-r--r--fs/erofs/sysfs.c6
-rw-r--r--fs/erofs/tagptr.h107
-rw-r--r--fs/erofs/xattr.c20
-rw-r--r--fs/erofs/zdata.c436
-rw-r--r--fs/erofs/zdata.h178
-rw-r--r--fs/erofs/zmap.c263
-rw-r--r--fs/exec.c14
-rw-r--r--fs/exfat/exfat_fs.h4
-rw-r--r--fs/exfat/file.c10
-rw-r--r--fs/exfat/namei.c6
-rw-r--r--fs/exportfs/expfs.c4
-rw-r--r--fs/ext2/acl.c4
-rw-r--r--fs/ext2/acl.h2
-rw-r--r--fs/ext2/dir.c17
-rw-r--r--fs/ext2/ext2.h11
-rw-r--r--fs/ext2/ialloc.c2
-rw-r--r--fs/ext2/inode.c20
-rw-r--r--fs/ext2/ioctl.c6
-rw-r--r--fs/ext2/namei.c33
-rw-r--r--fs/ext2/xattr_security.c2
-rw-r--r--fs/ext2/xattr_trusted.c2
-rw-r--r--fs/ext2/xattr_user.c2
-rw-r--r--fs/ext4/acl.c4
-rw-r--r--fs/ext4/acl.h2
-rw-r--r--fs/ext4/ext4.h16
-rw-r--r--fs/ext4/ialloc.c6
-rw-r--r--fs/ext4/inode.c34
-rw-r--r--fs/ext4/ioctl.c20
-rw-r--r--fs/ext4/namei.c32
-rw-r--r--fs/ext4/readpage.c3
-rw-r--r--fs/ext4/super.c18
-rw-r--r--fs/ext4/symlink.c4
-rw-r--r--fs/ext4/verity.c6
-rw-r--r--fs/ext4/xattr.c40
-rw-r--r--fs/ext4/xattr_hurd.c2
-rw-r--r--fs/ext4/xattr_security.c2
-rw-r--r--fs/ext4/xattr_trusted.c2
-rw-r--r--fs/ext4/xattr_user.c2
-rw-r--r--fs/f2fs/acl.c14
-rw-r--r--fs/f2fs/acl.h2
-rw-r--r--fs/f2fs/data.c5
-rw-r--r--fs/f2fs/extent_cache.c34
-rw-r--r--fs/f2fs/f2fs.h8
-rw-r--r--fs/f2fs/file.c52
-rw-r--r--fs/f2fs/namei.c50
-rw-r--r--fs/f2fs/recovery.c6
-rw-r--r--fs/f2fs/segment.c13
-rw-r--r--fs/f2fs/super.c6
-rw-r--r--fs/f2fs/verity.c6
-rw-r--r--fs/f2fs/xattr.c6
-rw-r--r--fs/fat/fat.h4
-rw-r--r--fs/fat/file.c26
-rw-r--r--fs/fat/namei_msdos.c6
-rw-r--r--fs/fat/namei_vfat.c6
-rw-r--r--fs/fcntl.c3
-rw-r--r--fs/file_table.c1
-rw-r--r--fs/freevxfs/Kconfig2
-rw-r--r--fs/fscache/volume.c14
-rw-r--r--fs/fuse/acl.c72
-rw-r--r--fs/fuse/dir.c38
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/fuse/fuse_i.h10
-rw-r--r--fs/fuse/inode.c21
-rw-r--r--fs/fuse/ioctl.c2
-rw-r--r--fs/fuse/xattr.c53
-rw-r--r--fs/gfs2/acl.c4
-rw-r--r--fs/gfs2/acl.h2
-rw-r--r--fs/gfs2/file.c5
-rw-r--r--fs/gfs2/inode.c58
-rw-r--r--fs/gfs2/inode.h4
-rw-r--r--fs/gfs2/log.c11
-rw-r--r--fs/gfs2/xattr.c2
-rw-r--r--fs/hfs/attr.c2
-rw-r--r--fs/hfs/dir.c6
-rw-r--r--fs/hfs/hfs_fs.h2
-rw-r--r--fs/hfs/inode.c21
-rw-r--r--fs/hfsplus/dir.c14
-rw-r--r--fs/hfsplus/hfsplus_fs.h4
-rw-r--r--fs/hfsplus/inode.c14
-rw-r--r--fs/hfsplus/xattr.c2
-rw-r--r--fs/hfsplus/xattr_security.c2
-rw-r--r--fs/hfsplus/xattr_trusted.c2
-rw-r--r--fs/hfsplus/xattr_user.c2
-rw-r--r--fs/hostfs/hostfs_kern.c20
-rw-r--r--fs/hpfs/hpfs_fn.h2
-rw-r--r--fs/hpfs/inode.c6
-rw-r--r--fs/hpfs/namei.c10
-rw-r--r--fs/hugetlbfs/inode.c22
-rw-r--r--fs/init.c14
-rw-r--r--fs/inode.c58
-rw-r--r--fs/internal.h15
-rw-r--r--fs/ioctl.c16
-rw-r--r--fs/iomap/direct-io.c10
-rw-r--r--fs/jffs2/acl.c4
-rw-r--r--fs/jffs2/acl.h2
-rw-r--r--fs/jffs2/dir.c20
-rw-r--r--fs/jffs2/fs.c6
-rw-r--r--fs/jffs2/os-linux.h2
-rw-r--r--fs/jffs2/security.c2
-rw-r--r--fs/jffs2/xattr_trusted.c2
-rw-r--r--fs/jffs2/xattr_user.c2
-rw-r--r--fs/jfs/acl.c4
-rw-r--r--fs/jfs/file.c12
-rw-r--r--fs/jfs/ioctl.c2
-rw-r--r--fs/jfs/jfs_acl.h2
-rw-r--r--fs/jfs/jfs_inode.c2
-rw-r--r--fs/jfs/jfs_inode.h4
-rw-r--r--fs/jfs/namei.c10
-rw-r--r--fs/jfs/xattr.c4
-rw-r--r--fs/kernfs/dir.c4
-rw-r--r--fs/kernfs/inode.c18
-rw-r--r--fs/kernfs/kernfs-internal.h6
-rw-r--r--fs/ksmbd/auth.c3
-rw-r--r--fs/ksmbd/connection.c24
-rw-r--r--fs/ksmbd/ksmbd_netlink.h3
-rw-r--r--fs/ksmbd/ndr.c14
-rw-r--r--fs/ksmbd/ndr.h2
-rw-r--r--fs/ksmbd/oplock.c6
-rw-r--r--fs/ksmbd/server.h1
-rw-r--r--fs/ksmbd/smb2pdu.c132
-rw-r--r--fs/ksmbd/smb2pdu.h5
-rw-r--r--fs/ksmbd/smb_common.c4
-rw-r--r--fs/ksmbd/smbacl.c72
-rw-r--r--fs/ksmbd/smbacl.h12
-rw-r--r--fs/ksmbd/transport_ipc.c3
-rw-r--r--fs/ksmbd/transport_tcp.c22
-rw-r--r--fs/ksmbd/vfs.c173
-rw-r--r--fs/ksmbd/vfs.h36
-rw-r--r--fs/ksmbd/vfs_cache.c5
-rw-r--r--fs/libfs.c56
-rw-r--r--fs/lockd/clntlock.c2
-rw-r--r--fs/lockd/clntproc.c3
-rw-r--r--fs/lockd/netns.h1
-rw-r--r--fs/locks.c29
-rw-r--r--fs/minix/bitmap.c2
-rw-r--r--fs/minix/file.c6
-rw-r--r--fs/minix/inode.c4
-rw-r--r--fs/minix/minix.h2
-rw-r--r--fs/minix/namei.c14
-rw-r--r--fs/mnt_idmapping.c273
-rw-r--r--fs/mpage.c2
-rw-r--r--fs/namei.c451
-rw-r--r--fs/namespace.c118
-rw-r--r--fs/nfs/Kconfig8
-rw-r--r--fs/nfs/dir.c21
-rw-r--r--fs/nfs/export.c7
-rw-r--r--fs/nfs/file.c1
-rw-r--r--fs/nfs/filelayout/filelayout.c8
-rw-r--r--fs/nfs/inode.c22
-rw-r--r--fs/nfs/internal.h10
-rw-r--r--fs/nfs/namespace.c10
-rw-r--r--fs/nfs/nfs3_fs.h2
-rw-r--r--fs/nfs/nfs3acl.c2
-rw-r--r--fs/nfs/nfs4_fs.h1
-rw-r--r--fs/nfs/nfs4proc.c10
-rw-r--r--fs/nfs/pagelist.c1
-rw-r--r--fs/nfs/write.c1
-rw-r--r--fs/nfs_common/grace.c1
-rw-r--r--fs/nfsd/filecache.c101
-rw-r--r--fs/nfsd/filecache.h5
-rw-r--r--fs/nfsd/netns.h3
-rw-r--r--fs/nfsd/nfs2acl.c4
-rw-r--r--fs/nfsd/nfs3acl.c4
-rw-r--r--fs/nfsd/nfs3proc.c2
-rw-r--r--fs/nfsd/nfs4proc.c8
-rw-r--r--fs/nfsd/nfs4recover.c6
-rw-r--r--fs/nfsd/nfs4state.c52
-rw-r--r--fs/nfsd/nfs4xdr.c17
-rw-r--r--fs/nfsd/nfsctl.c7
-rw-r--r--fs/nfsd/nfsd.h6
-rw-r--r--fs/nfsd/nfsfh.c44
-rw-r--r--fs/nfsd/nfsfh.h29
-rw-r--r--fs/nfsd/nfsproc.c6
-rw-r--r--fs/nfsd/nfssvc.c2
-rw-r--r--fs/nfsd/trace.h52
-rw-r--r--fs/nfsd/vfs.c41
-rw-r--r--fs/nfsd/vfs.h7
-rw-r--r--fs/nilfs2/btree.c15
-rw-r--r--fs/nilfs2/inode.c12
-rw-r--r--fs/nilfs2/ioctl.c9
-rw-r--r--fs/nilfs2/namei.c10
-rw-r--r--fs/nilfs2/nilfs.h6
-rw-r--r--fs/nilfs2/super.c9
-rw-r--r--fs/nilfs2/the_nilfs.c8
-rw-r--r--fs/notify/fanotify/fanotify.c8
-rw-r--r--fs/notify/fanotify/fanotify.h6
-rw-r--r--fs/notify/fanotify/fanotify_user.c88
-rw-r--r--fs/ntfs/inode.c6
-rw-r--r--fs/ntfs/inode.h2
-rw-r--r--fs/ntfs3/file.c16
-rw-r--r--fs/ntfs3/inode.c6
-rw-r--r--fs/ntfs3/namei.c24
-rw-r--r--fs/ntfs3/ntfs_fs.h16
-rw-r--r--fs/ntfs3/xattr.c24
-rw-r--r--fs/ocfs2/acl.c4
-rw-r--r--fs/ocfs2/acl.h2
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c14
-rw-r--r--fs/ocfs2/file.c20
-rw-r--r--fs/ocfs2/file.h6
-rw-r--r--fs/ocfs2/ioctl.c2
-rw-r--r--fs/ocfs2/ioctl.h2
-rw-r--r--fs/ocfs2/locks.c1
-rw-r--r--fs/ocfs2/namei.c18
-rw-r--r--fs/ocfs2/refcounttree.c4
-rw-r--r--fs/ocfs2/stack_user.c1
-rw-r--r--fs/ocfs2/xattr.c6
-rw-r--r--fs/omfs/dir.c6
-rw-r--r--fs/omfs/file.c6
-rw-r--r--fs/omfs/inode.c2
-rw-r--r--fs/open.c43
-rw-r--r--fs/orangefs/acl.c4
-rw-r--r--fs/orangefs/file.c1
-rw-r--r--fs/orangefs/inode.c18
-rw-r--r--fs/orangefs/namei.c8
-rw-r--r--fs/orangefs/orangefs-kernel.h8
-rw-r--r--fs/orangefs/xattr.c2
-rw-r--r--fs/overlayfs/copy_up.c6
-rw-r--r--fs/overlayfs/dir.c12
-rw-r--r--fs/overlayfs/export.c4
-rw-r--r--fs/overlayfs/file.c8
-rw-r--r--fs/overlayfs/inode.c46
-rw-r--r--fs/overlayfs/namei.c6
-rw-r--r--fs/overlayfs/overlayfs.h55
-rw-r--r--fs/overlayfs/ovl_entry.h4
-rw-r--r--fs/overlayfs/readdir.c4
-rw-r--r--fs/overlayfs/super.c4
-rw-r--r--fs/overlayfs/util.c14
-rw-r--r--fs/posix_acl.c107
-rw-r--r--fs/proc/base.c22
-rw-r--r--fs/proc/fd.c9
-rw-r--r--fs/proc/fd.h2
-rw-r--r--fs/proc/generic.c10
-rw-r--r--fs/proc/internal.h4
-rw-r--r--fs/proc/proc_net.c4
-rw-r--r--fs/proc/proc_sysctl.c12
-rw-r--r--fs/proc/root.c4
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/quota/dquot.c10
-rw-r--r--fs/ramfs/file-nommu.c8
-rw-r--r--fs/ramfs/inode.c16
-rw-r--r--fs/reiserfs/acl.h2
-rw-r--r--fs/reiserfs/inode.c10
-rw-r--r--fs/reiserfs/ioctl.c4
-rw-r--r--fs/reiserfs/namei.c12
-rw-r--r--fs/reiserfs/reiserfs.h4
-rw-r--r--fs/reiserfs/xattr.c12
-rw-r--r--fs/reiserfs/xattr.h2
-rw-r--r--fs/reiserfs/xattr_acl.c6
-rw-r--r--fs/reiserfs/xattr_security.c2
-rw-r--r--fs/reiserfs/xattr_trusted.c2
-rw-r--r--fs/reiserfs/xattr_user.c2
-rw-r--r--fs/remap_range.c6
-rw-r--r--fs/squashfs/squashfs_fs.h2
-rw-r--r--fs/squashfs/squashfs_fs_sb.h2
-rw-r--r--fs/squashfs/xattr.h4
-rw-r--r--fs/squashfs/xattr_id.c2
-rw-r--r--fs/stat.c41
-rw-r--r--fs/super.c22
-rw-r--r--fs/sysv/file.c6
-rw-r--r--fs/sysv/ialloc.c2
-rw-r--r--fs/sysv/itree.c4
-rw-r--r--fs/sysv/namei.c12
-rw-r--r--fs/sysv/sysv.h2
-rw-r--r--fs/tracefs/inode.c2
-rw-r--r--fs/ubifs/dir.c18
-rw-r--r--fs/ubifs/file.c8
-rw-r--r--fs/ubifs/ioctl.c2
-rw-r--r--fs/ubifs/ubifs.h8
-rw-r--r--fs/ubifs/xattr.c2
-rw-r--r--fs/udf/balloc.c33
-rw-r--r--fs/udf/dir.c148
-rw-r--r--fs/udf/directory.c579
-rw-r--r--fs/udf/file.c182
-rw-r--r--fs/udf/ialloc.c33
-rw-r--r--fs/udf/inode.c606
-rw-r--r--fs/udf/lowlevel.c7
-rw-r--r--fs/udf/misc.c18
-rw-r--r--fs/udf/namei.c1105
-rw-r--r--fs/udf/partition.c9
-rw-r--r--fs/udf/super.c77
-rw-r--r--fs/udf/symlink.c32
-rw-r--r--fs/udf/truncate.c6
-rw-r--r--fs/udf/udf_i.h3
-rw-r--r--fs/udf/udf_sb.h3
-rw-r--r--fs/udf/udfdecl.h57
-rw-r--r--fs/ufs/ialloc.c2
-rw-r--r--fs/ufs/inode.c6
-rw-r--r--fs/ufs/namei.c10
-rw-r--r--fs/ufs/ufs.h2
-rw-r--r--fs/userfaultfd.c28
-rw-r--r--fs/utimes.c3
-rw-r--r--fs/vboxsf/dir.c8
-rw-r--r--fs/vboxsf/utils.c6
-rw-r--r--fs/vboxsf/vfsmod.h4
-rw-r--r--fs/verity/Kconfig8
-rw-r--r--fs/verity/enable.c271
-rw-r--r--fs/verity/fsverity_private.h24
-rw-r--r--fs/verity/hash_algs.c24
-rw-r--r--fs/verity/init.c1
-rw-r--r--fs/verity/open.c163
-rw-r--r--fs/verity/signature.c2
-rw-r--r--fs/verity/verify.c346
-rw-r--r--fs/xattr.c84
-rw-r--r--fs/xfs/libxfs/xfs_btree.c7
-rw-r--r--fs/xfs/xfs_acl.c4
-rw-r--r--fs/xfs/xfs_acl.h2
-rw-r--r--fs/xfs/xfs_extent_busy.c1
-rw-r--r--fs/xfs/xfs_file.c2
-rw-r--r--fs/xfs/xfs_icache.c10
-rw-r--r--fs/xfs/xfs_inode.c32
-rw-r--r--fs/xfs/xfs_inode.h8
-rw-r--r--fs/xfs/xfs_ioctl.c12
-rw-r--r--fs/xfs/xfs_ioctl.h2
-rw-r--r--fs/xfs/xfs_ioctl32.c2
-rw-r--r--fs/xfs/xfs_iomap.c2
-rw-r--r--fs/xfs/xfs_iops.c85
-rw-r--r--fs/xfs/xfs_iops.h2
-rw-r--r--fs/xfs/xfs_itable.c14
-rw-r--r--fs/xfs/xfs_itable.h2
-rw-r--r--fs/xfs/xfs_linux.h1
-rw-r--r--fs/xfs/xfs_pnfs.c2
-rw-r--r--fs/xfs/xfs_qm.c4
-rw-r--r--fs/xfs/xfs_reflink.c2
-rw-r--r--fs/xfs/xfs_symlink.c8
-rw-r--r--fs/xfs/xfs_symlink.h2
-rw-r--r--fs/xfs/xfs_xattr.c2
-rw-r--r--fs/zonefs/super.c32
-rw-r--r--include/acpi/acpi_bus.h3
-rw-r--r--include/acpi/actbl3.h1
-rw-r--r--include/drm/drm_client.h5
-rw-r--r--include/drm/drm_fb_helper.h12
-rw-r--r--include/drm/drm_plane_helper.h1
-rw-r--r--include/drm/drm_vma_manager.h1
-rw-r--r--include/kunit/test.h6
-rw-r--r--include/kvm/arm_vgic.h2
-rw-r--r--include/linux/apple-gmux.h109
-rw-r--r--include/linux/audit.h9
-rw-r--r--include/linux/bio.h6
-rw-r--r--include/linux/blkdev.h1
-rw-r--r--include/linux/bpf.h2
-rw-r--r--include/linux/capability.h9
-rw-r--r--include/linux/ceph/libceph.h10
-rw-r--r--include/linux/dsa/tag_qca.h4
-rw-r--r--include/linux/efi.h3
-rw-r--r--include/linux/elfcore.h8
-rw-r--r--include/linux/evm.h26
-rw-r--r--include/linux/exportfs.h1
-rw-r--r--include/linux/fanotify.h5
-rw-r--r--include/linux/fb.h1
-rw-r--r--include/linux/fileattr.h2
-rw-r--r--include/linux/filelock.h439
-rw-r--r--include/linux/firmware/xlnx-zynqmp.h8
-rw-r--r--include/linux/fs.h600
-rw-r--r--include/linux/fscrypt.h18
-rw-r--r--include/linux/fsverity.h93
-rw-r--r--include/linux/highmem-internal.h9
-rw-r--r--include/linux/hugetlb.h18
-rw-r--r--include/linux/ima.h22
-rw-r--r--include/linux/io_uring_types.h3
-rw-r--r--include/linux/iomap.h3
-rw-r--r--include/linux/iversion.h60
-rw-r--r--include/linux/key.h8
-rw-r--r--include/linux/lockd/lockd.h4
-rw-r--r--include/linux/lockd/xdr.h1
-rw-r--r--include/linux/lsm_hook_defs.h14
-rw-r--r--include/linux/lsm_hooks.h2
-rw-r--r--include/linux/memcontrol.h5
-rw-r--r--include/linux/mlx5/device.h5
-rw-r--r--include/linux/mlx5/driver.h15
-rw-r--r--include/linux/mlx5/mlx5_ifc.h3
-rw-r--r--include/linux/mm.h20
-rw-r--r--include/linux/mm_inline.h3
-rw-r--r--include/linux/mm_types.h2
-rw-r--r--include/linux/mnt_idmapping.h226
-rw-r--r--include/linux/mount.h2
-rw-r--r--include/linux/mtd/spi-nor.h1
-rw-r--r--include/linux/namei.h6
-rw-r--r--include/linux/netdevice.h2
-rw-r--r--include/linux/netfilter/ipset/ip_set.h2
-rw-r--r--include/linux/nfs_fs.h6
-rw-r--r--include/linux/nvmem-provider.h2
-rw-r--r--include/linux/page_ref.h2
-rw-r--r--include/linux/pci.h12
-rw-r--r--include/linux/perf/arm_pmu.h1
-rw-r--r--include/linux/phy.h5
-rw-r--r--include/linux/pktcdvd.h197
-rw-r--r--include/linux/platform_data/x86/simatic-ipc.h3
-rw-r--r--include/linux/poison.h3
-rw-r--r--include/linux/posix_acl.h24
-rw-r--r--include/linux/quotaops.h10
-rw-r--r--include/linux/security.h46
-rw-r--r--include/linux/shrinker.h5
-rw-r--r--include/linux/soc/ti/omap1-io.h4
-rw-r--r--include/linux/spinlock.h9
-rw-r--r--include/linux/stat.h9
-rw-r--r--include/linux/stmmac.h1
-rw-r--r--include/linux/sunrpc/rpc_pipe_fs.h5
-rw-r--r--include/linux/swap.h3
-rw-r--r--include/linux/tpm.h1
-rw-r--r--include/linux/tpm_eventlog.h4
-rw-r--r--include/linux/trace_events.h1
-rw-r--r--include/linux/usb.h18
-rw-r--r--include/linux/util_macros.h12
-rw-r--r--include/linux/xattr.h20
-rw-r--r--include/net/af_rxrpc.h3
-rw-r--r--include/net/inet_hashtables.h4
-rw-r--r--include/net/inet_timewait_sock.h5
-rw-r--r--include/net/mac80211.h4
-rw-r--r--include/net/mana/gdma.h3
-rw-r--r--include/net/netfilter/nf_tables.h25
-rw-r--r--include/net/sch_generic.h7
-rw-r--r--include/net/sock.h13
-rw-r--r--include/net/tc_wrapper.h4
-rw-r--r--include/scsi/libiscsi.h2
-rw-r--r--include/scsi/scsi_transport_iscsi.h9
-rw-r--r--include/soc/bcm2835/raspberrypi-firmware.h2
-rw-r--r--include/trace/events/btrfs.h129
-rw-r--r--include/trace/events/erofs.h17
-rw-r--r--include/trace/events/rxrpc.h166
-rw-r--r--include/trace/stages/stage4_event_fields.h3
-rw-r--r--include/uapi/drm/virtgpu_drm.h1
-rw-r--r--include/uapi/linux/atmbr2684.h2
-rw-r--r--include/uapi/linux/fanotify.h30
-rw-r--r--include/uapi/linux/ip.h1
-rw-r--r--include/uapi/linux/ipv6.h1
-rw-r--r--include/uapi/linux/netfilter/nf_conntrack_sctp.h3
-rw-r--r--include/uapi/linux/netfilter/nfnetlink_cttimeout.h3
-rw-r--r--include/uapi/linux/pktcdvd.h112
-rw-r--r--include/uapi/linux/psci.h4
-rw-r--r--include/uapi/linux/vdpa.h4
-rw-r--r--include/ufs/ufshcd.h2
-rw-r--r--include/xen/xenbus.h2
-rw-r--r--init/Kconfig10
-rw-r--r--init/Makefile1
-rw-r--r--init/version-timestamp.c1
-rw-r--r--io_uring/fdinfo.c12
-rw-r--r--io_uring/io-wq.c6
-rw-r--r--io_uring/io_uring.c41
-rw-r--r--io_uring/io_uring.h15
-rw-r--r--io_uring/msg_ring.c130
-rw-r--r--io_uring/net.c11
-rw-r--r--io_uring/poll.c52
-rw-r--r--io_uring/rw.c6
-rw-r--r--ipc/mqueue.c6
-rw-r--r--kernel/auditsc.c22
-rw-r--r--kernel/bpf/bpf_lsm.c3
-rw-r--r--kernel/bpf/btf.c4
-rw-r--r--kernel/bpf/hashtab.c4
-rw-r--r--kernel/bpf/inode.c8
-rw-r--r--kernel/bpf/memalloc.c2
-rw-r--r--kernel/bpf/offload.c3
-rw-r--r--kernel/bpf/syscall.c24
-rw-r--r--kernel/bpf/task_iter.c39
-rw-r--r--kernel/bpf/trampoline.c4
-rw-r--r--kernel/bpf/verifier.c56
-rw-r--r--kernel/capability.c10
-rw-r--r--kernel/cgroup/cgroup.c2
-rw-r--r--kernel/cgroup/cpuset.c48
-rw-r--r--kernel/events/core.c39
-rwxr-xr-xkernel/gen_kheaders.sh2
-rw-r--r--kernel/irq/irqdomain.c4
-rw-r--r--kernel/irq/msi.c6
-rw-r--r--kernel/kallsyms_selftest.c21
-rw-r--r--kernel/kcsan/kcsan_test.c7
-rw-r--r--kernel/locking/rtmutex.c5
-rw-r--r--kernel/module/main.c26
-rw-r--r--kernel/printk/printk.c2
-rw-r--r--kernel/sched/core.c82
-rw-r--r--kernel/sched/fair.c48
-rw-r--r--kernel/sched/psi.c7
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/time/alarmtimer.c33
-rw-r--r--kernel/time/tick-oneshot.c4
-rw-r--r--kernel/time/time.c8
-rw-r--r--kernel/time/timekeeping.c8
-rw-r--r--kernel/trace/Kconfig8
-rw-r--r--kernel/trace/bpf_trace.c6
-rw-r--r--kernel/trace/ftrace.c23
-rw-r--r--kernel/trace/rv/rv.c2
-rw-r--r--kernel/trace/trace.c5
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_events.c39
-rw-r--r--kernel/trace/trace_events_filter.c8
-rw-r--r--kernel/trace/trace_events_hist.c2
-rw-r--r--kernel/trace/trace_export.c3
-rw-r--r--kernel/trace/trace_osnoise.c5
-rw-r--r--kernel/trace/trace_output.c3
-rw-r--r--kernel/umh.c20
-rw-r--r--lib/Kconfig.debug14
-rw-r--r--lib/Kconfig.kcsan2
-rw-r--r--lib/dec_and_lock.c31
-rw-r--r--lib/kunit/assert.c40
-rw-r--r--lib/kunit/test.c1
-rw-r--r--lib/lockref.c1
-rw-r--r--lib/maple_tree.c22
-rw-r--r--lib/memcpy_kunit.c2
-rw-r--r--lib/nlattr.c3
-rw-r--r--lib/parser.c39
-rw-r--r--lib/scatterlist.c25
-rw-r--r--lib/test_maple_tree.c89
-rw-r--r--lib/win_minmax.c2
-rw-r--r--mm/compaction.c1
-rw-r--r--mm/filemap.c5
-rw-r--r--mm/gup.c2
-rw-r--r--mm/huge_memory.c6
-rw-r--r--mm/hugetlb.c102
-rw-r--r--mm/kasan/common.c3
-rw-r--r--mm/kasan/generic.c7
-rw-r--r--mm/kasan/report.c2
-rw-r--r--mm/kasan/shadow.c12
-rw-r--r--mm/khugepaged.c39
-rw-r--r--mm/kmemleak.c5
-rw-r--r--mm/ksm.c7
-rw-r--r--mm/madvise.c4
-rw-r--r--mm/memblock.c2
-rw-r--r--mm/memcontrol.c67
-rw-r--r--mm/memory.c17
-rw-r--r--mm/mempolicy.c3
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mincore.c2
-rw-r--r--mm/mmap.c8
-rw-r--r--mm/mprotect.c8
-rw-r--r--mm/mremap.c25
-rw-r--r--mm/nommu.c9
-rw-r--r--mm/page_alloc.c5
-rw-r--r--mm/secretmem.c4
-rw-r--r--mm/shmem.c75
-rw-r--r--mm/shrinker_debug.c13
-rw-r--r--mm/slab.c2
-rw-r--r--mm/swap.c30
-rw-r--r--mm/swapfile.c21
-rw-r--r--mm/vmscan.c15
-rw-r--r--mm/zsmalloc.c237
-rw-r--r--net/9p/trans_xen.c3
-rw-r--r--net/bluetooth/hci_conn.c18
-rw-r--r--net/bluetooth/hci_event.c5
-rw-r--r--net/bluetooth/hci_sync.c19
-rw-r--r--net/bluetooth/iso.c64
-rw-r--r--net/bluetooth/mgmt_util.h2
-rw-r--r--net/bluetooth/rfcomm/sock.c7
-rw-r--r--net/bridge/br_netfilter_hooks.c1
-rw-r--r--net/caif/caif_socket.c1
-rw-r--r--net/caif/cfctrl.c6
-rw-r--r--net/can/isotp.c69
-rw-r--r--net/can/j1939/address-claim.c40
-rw-r--r--net/can/j1939/transport.c4
-rw-r--r--net/can/raw.c47
-rw-r--r--net/core/dev.c10
-rw-r--r--net/core/devlink.c14
-rw-r--r--net/core/filter.c7
-rw-r--r--net/core/gro.c14
-rw-r--r--net/core/neighbour.c18
-rw-r--r--net/core/net_namespace.c12
-rw-r--r--net/core/skbuff.c5
-rw-r--r--net/core/sock.c3
-rw-r--r--net/core/sock_map.c61
-rw-r--r--net/core/stream.c1
-rw-r--r--net/dccp/ipv6.c7
-rw-r--r--net/ethtool/ioctl.c107
-rw-r--r--net/ethtool/rss.c11
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/inet_connection_sock.c43
-rw-r--r--net/ipv4/inet_hashtables.c25
-rw-r--r--net/ipv4/inet_timewait_sock.c39
-rw-r--r--net/ipv4/metrics.c2
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_bpf.c4
-rw-r--r--net/ipv4/tcp_ulp.c4
-rw-r--r--net/ipv6/addrconf.c59
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/datagram.c2
-rw-r--r--net/ipv6/ip6_output.c15
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/ipv6/tcp_ipv6.c11
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/l2tp/l2tp_core.c102
-rw-r--r--net/mac80211/agg-tx.c8
-rw-r--r--net/mac80211/cfg.c7
-rw-r--r--net/mac80211/debugfs_sta.c5
-rw-r--r--net/mac80211/driver-ops.c3
-rw-r--r--net/mac80211/driver-ops.h2
-rw-r--r--net/mac80211/ht.c31
-rw-r--r--net/mac80211/ieee80211_i.h2
-rw-r--r--net/mac80211/iface.c5
-rw-r--r--net/mac80211/rx.c225
-rw-r--r--net/mac80211/tx.c34
-rw-r--r--net/mac80211/util.c42
-rw-r--r--net/mac802154/rx.c1
-rw-r--r--net/mctp/af_mctp.c16
-rw-r--r--net/mctp/route.c34
-rw-r--r--net/mpls/af_mpls.c4
-rw-r--r--net/mptcp/pm.c25
-rw-r--r--net/mptcp/pm_netlink.c10
-rw-r--r--net/mptcp/pm_userspace.c7
-rw-r--r--net/mptcp/protocol.c31
-rw-r--r--net/mptcp/protocol.h10
-rw-r--r--net/mptcp/sockopt.c11
-rw-r--r--net/mptcp/subflow.c40
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c4
-rw-r--r--net/netfilter/ipset/ip_set_core.c7
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c14
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c13
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c13
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c13
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c13
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c17
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c15
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c23
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c19
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c40
-rw-r--r--net/netfilter/nf_conntrack_proto.c7
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c167
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c15
-rw-r--r--net/netfilter/nf_conntrack_standalone.c16
-rw-r--r--net/netfilter/nf_tables_api.c261
-rw-r--r--net/netfilter/nft_payload.c2
-rw-r--r--net/netfilter/nft_set_rbtree.c332
-rw-r--r--net/netlink/af_netlink.c38
-rw-r--r--net/netrom/af_netrom.c5
-rw-r--r--net/netrom/nr_timer.c1
-rw-r--r--net/nfc/llcp_core.c1
-rw-r--r--net/nfc/netlink.c52
-rw-r--r--net/openvswitch/datapath.c20
-rw-r--r--net/openvswitch/meter.c4
-rw-r--r--net/qrtr/ns.c5
-rw-r--r--net/rds/message.c6
-rw-r--r--net/rose/af_rose.c8
-rw-r--r--net/rxrpc/Makefile1
-rw-r--r--net/rxrpc/af_rxrpc.c27
-rw-r--r--net/rxrpc/ar-internal.h212
-rw-r--r--net/rxrpc/call_accept.c57
-rw-r--r--net/rxrpc/call_event.c86
-rw-r--r--net/rxrpc/call_object.c116
-rw-r--r--net/rxrpc/call_state.c69
-rw-r--r--net/rxrpc/conn_client.c709
-rw-r--r--net/rxrpc/conn_event.c382
-rw-r--r--net/rxrpc/conn_object.c67
-rw-r--r--net/rxrpc/conn_service.c1
-rw-r--r--net/rxrpc/input.c175
-rw-r--r--net/rxrpc/insecure.c20
-rw-r--r--net/rxrpc/io_thread.c204
-rw-r--r--net/rxrpc/local_object.c35
-rw-r--r--net/rxrpc/net_ns.c17
-rw-r--r--net/rxrpc/output.c60
-rw-r--r--net/rxrpc/peer_object.c23
-rw-r--r--net/rxrpc/proc.c17
-rw-r--r--net/rxrpc/recvmsg.c270
-rw-r--r--net/rxrpc/rxkad.c356
-rw-r--r--net/rxrpc/rxperf.c17
-rw-r--r--net/rxrpc/security.c53
-rw-r--r--net/rxrpc/sendmsg.c195
-rw-r--r--net/sched/act_ctinfo.c6
-rw-r--r--net/sched/act_mpls.c8
-rw-r--r--net/sched/cls_tcindex.c46
-rw-r--r--net/sched/sch_api.c5
-rw-r--r--net/sched/sch_atm.c5
-rw-r--r--net/sched/sch_cbq.c4
-rw-r--r--net/sched/sch_gred.c2
-rw-r--r--net/sched/sch_htb.c40
-rw-r--r--net/sched/sch_taprio.c2
-rw-r--r--net/sctp/bind_addr.c6
-rw-r--r--net/sctp/diag.c4
-rw-r--r--net/sctp/transport.c4
-rw-r--r--net/socket.c15
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c19
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c4
-rw-r--r--net/sunrpc/svc.c6
-rw-r--r--net/sunrpc/svc_xprt.c2
-rw-r--r--net/sunrpc/svcsock.c8
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c2
-rw-r--r--net/tipc/node.c12
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/tls/tls_sw.c2
-rw-r--r--net/unix/af_unix.c8
-rw-r--r--net/x25/af_x25.c6
-rw-r--r--net/xfrm/xfrm_compat.c4
-rw-r--r--net/xfrm/xfrm_input.c3
-rw-r--r--net/xfrm/xfrm_interface_core.c54
-rw-r--r--net/xfrm/xfrm_policy.c14
-rw-r--r--net/xfrm/xfrm_state.c18
-rw-r--r--rust/Makefile15
-rw-r--r--rust/alloc/borrow.rs498
-rw-r--r--rust/alloc/lib.rs3
-rw-r--r--rust/alloc/vec/mod.rs3
-rw-r--r--rust/bindings/bindings_helper.h1
-rw-r--r--rust/bindings/lib.rs1
-rw-r--r--rust/compiler_builtins.rs5
-rw-r--r--rust/helpers.c19
-rw-r--r--rust/kernel/lib.rs6
-rw-r--r--rust/kernel/prelude.rs8
-rw-r--r--rust/kernel/print.rs29
-rw-r--r--rust/kernel/sync.rs10
-rw-r--r--rust/kernel/sync/arc.rs524
-rw-r--r--rust/kernel/types.rs215
-rw-r--r--samples/ftrace/ftrace-direct-multi-modify.c1
-rw-r--r--samples/ftrace/ftrace-direct-multi.c1
-rw-r--r--scripts/Makefile.modinst6
-rw-r--r--scripts/Makefile.modpost9
-rw-r--r--scripts/Makefile.vmlinux1
-rw-r--r--[-rwxr-xr-x]scripts/atomic/atomics.tbl0
-rw-r--r--scripts/gcc-plugins/gcc-common.h4
-rw-r--r--scripts/gdb/linux/cpus.py2
-rwxr-xr-xscripts/jobserver-exec19
-rw-r--r--scripts/kconfig/.gitignore2
-rw-r--r--scripts/kconfig/Makefile2
-rwxr-xr-xscripts/package/mkspec2
-rwxr-xr-xscripts/tracing/ftrace-bisect.sh34
-rw-r--r--security/apparmor/apparmorfs.c2
-rw-r--r--security/apparmor/domain.c4
-rw-r--r--security/apparmor/file.c2
-rw-r--r--security/apparmor/lsm.c16
-rw-r--r--security/apparmor/policy_compat.c3
-rw-r--r--security/commoncap.c68
-rw-r--r--security/integrity/evm/evm_crypto.c10
-rw-r--r--security/integrity/evm/evm_main.c46
-rw-r--r--security/integrity/evm/evm_secfs.c2
-rw-r--r--security/integrity/ima/ima.h10
-rw-r--r--security/integrity/ima/ima_api.c6
-rw-r--r--security/integrity/ima/ima_appraise.c18
-rw-r--r--security/integrity/ima/ima_asymmetric_keys.c2
-rw-r--r--security/integrity/ima/ima_main.c26
-rw-r--r--security/integrity/ima/ima_policy.c14
-rw-r--r--security/integrity/ima/ima_queue_keys.c2
-rw-r--r--security/integrity/ima/ima_template_lib.c2
-rw-r--r--security/keys/key.c137
-rw-r--r--security/security.c46
-rw-r--r--security/selinux/hooks.c22
-rw-r--r--security/smack/smack_lsm.c30
-rw-r--r--security/tomoyo/Kconfig4
-rw-r--r--security/tomoyo/Makefile19
-rw-r--r--sound/core/control.c24
-rw-r--r--sound/core/control_led.c5
-rw-r--r--sound/core/memalloc.c87
-rw-r--r--sound/firewire/motu/motu-hwdep.c4
-rw-r--r--sound/pci/hda/cs35l41_hda.c20
-rw-r--r--sound/pci/hda/hda_bind.c2
-rw-r--r--sound/pci/hda/hda_codec.c3
-rw-r--r--sound/pci/hda/patch_conexant.c1
-rw-r--r--sound/pci/hda/patch_hdmi.c1
-rw-r--r--sound/pci/hda/patch_realtek.c75
-rw-r--r--sound/pci/hda/patch_via.c3
-rw-r--r--sound/pci/lx6464es/lx_core.c11
-rw-r--r--sound/soc/amd/acp-es8336.c6
-rw-r--r--sound/soc/amd/yc/acp6x-mach.c35
-rw-r--r--sound/soc/codecs/cs42l56.c6
-rw-r--r--[-rwxr-xr-x]sound/soc/codecs/es8326.c6
-rw-r--r--[-rwxr-xr-x]sound/soc/codecs/es8326.h0
-rw-r--r--sound/soc/codecs/rt715-sdca-sdw.c2
-rw-r--r--sound/soc/codecs/rt9120.c12
-rw-r--r--sound/soc/codecs/tas5805m.c131
-rw-r--r--sound/soc/codecs/wm8904.c7
-rw-r--r--sound/soc/codecs/wsa883x.c4
-rw-r--r--sound/soc/fsl/fsl-asoc-card.c8
-rw-r--r--sound/soc/fsl/fsl_micfil.c16
-rw-r--r--sound/soc/fsl/fsl_sai.c1
-rw-r--r--sound/soc/fsl/fsl_ssi.c4
-rw-r--r--sound/soc/intel/avs/core.c24
-rw-r--r--sound/soc/intel/boards/Kconfig2
-rw-r--r--sound/soc/intel/boards/bytcht_es8316.c20
-rw-r--r--sound/soc/intel/boards/bytcr_rt5640.c12
-rw-r--r--sound/soc/intel/boards/bytcr_rt5651.c2
-rw-r--r--sound/soc/intel/boards/bytcr_wm5102.c2
-rw-r--r--sound/soc/intel/boards/sof_cs42l42.c3
-rw-r--r--sound/soc/intel/boards/sof_es8336.c14
-rw-r--r--sound/soc/intel/boards/sof_nau8825.c36
-rw-r--r--sound/soc/intel/boards/sof_rt5682.c5
-rw-r--r--sound/soc/intel/boards/sof_ssp_amp.c5
-rw-r--r--sound/soc/intel/common/soc-acpi-intel-adl-match.c20
-rw-r--r--sound/soc/intel/common/soc-acpi-intel-rpl-match.c50
-rw-r--r--sound/soc/mediatek/Kconfig4
-rw-r--r--sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c22
-rw-r--r--sound/soc/qcom/Kconfig21
-rw-r--r--sound/soc/qcom/Makefile2
-rw-r--r--sound/soc/qcom/common.c114
-rw-r--r--sound/soc/qcom/common.h10
-rw-r--r--sound/soc/qcom/lpass-cpu.c5
-rw-r--r--sound/soc/qcom/sc8280xp.c1
-rw-r--r--sound/soc/qcom/sdw.c123
-rw-r--r--sound/soc/qcom/sdw.h18
-rw-r--r--sound/soc/qcom/sm8250.c1
-rw-r--r--sound/soc/soc-topology.c8
-rw-r--r--sound/soc/sof/amd/acp.c36
-rw-r--r--sound/soc/sof/debug.c4
-rw-r--r--sound/soc/sof/intel/hda-dai.c8
-rw-r--r--sound/soc/sof/ipc4-mtrace.c7
-rw-r--r--sound/soc/sof/ops.h2
-rw-r--r--sound/soc/sof/pm.c9
-rw-r--r--sound/soc/sof/sof-audio.c16
-rw-r--r--sound/synth/emux/emux_nrpn.c3
-rw-r--r--sound/usb/implicit.c3
-rw-r--r--sound/usb/pcm.c222
-rw-r--r--sound/usb/quirks.c2
-rw-r--r--sound/usb/stream.c6
-rw-r--r--sound/xen/xen_snd_front.c3
-rw-r--r--tools/arch/arm64/include/asm/cputype.h8
-rw-r--r--tools/arch/arm64/include/uapi/asm/kvm.h1
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h5
-rw-r--r--tools/bpf/bpftool/Makefile4
-rw-r--r--tools/gpio/gpio-event-mon.c1
-rw-r--r--tools/include/linux/build_bug.h9
-rw-r--r--tools/include/nolibc/arch-mips.h2
-rw-r--r--tools/include/nolibc/arch-riscv.h14
-rw-r--r--tools/include/nolibc/ctype.h3
-rw-r--r--tools/include/nolibc/errno.h3
-rw-r--r--tools/include/nolibc/signal.h3
-rw-r--r--tools/include/nolibc/stdio.h3
-rw-r--r--tools/include/nolibc/stdlib.h3
-rw-r--r--tools/include/nolibc/string.h8
-rw-r--r--tools/include/nolibc/sys.h2
-rw-r--r--tools/include/nolibc/time.h3
-rw-r--r--tools/include/nolibc/types.h70
-rw-r--r--tools/include/nolibc/unistd.h3
-rw-r--r--tools/include/uapi/linux/kvm.h3
-rw-r--r--tools/objtool/check.c9
-rw-r--r--tools/perf/Documentation/Makefile2
-rw-r--r--tools/perf/Makefile.config12
-rw-r--r--tools/perf/Makefile.perf31
-rw-r--r--tools/perf/arch/riscv/util/unwind-libdw.c2
-rw-r--r--tools/perf/builtin-kmem.c65
-rw-r--r--tools/perf/builtin-lock.c2
-rw-r--r--tools/perf/builtin-trace.c2
-rw-r--r--tools/perf/command-list.txt10
-rw-r--r--tools/perf/tests/bpf-script-test-prologue.c2
-rw-r--r--tools/perf/tests/make12
-rwxr-xr-xtools/perf/tests/shell/buildid.sh15
-rwxr-xr-xtools/perf/tests/shell/record+probe_libc_inet_pton.sh3
-rw-r--r--tools/perf/trace/beauty/include/linux/socket.h5
-rwxr-xr-xtools/perf/util/PERF-VERSION-GEN2
-rw-r--r--tools/perf/util/auxtrace.c2
-rw-r--r--tools/perf/util/bpf_counter.h6
-rw-r--r--tools/perf/util/bpf_counter_cgroup.c14
-rw-r--r--tools/perf/util/build-id.c10
-rw-r--r--tools/perf/util/cgroup.c23
-rw-r--r--tools/perf/util/data.c2
-rw-r--r--tools/perf/util/expr.l5
-rwxr-xr-xtools/perf/util/generate-cmdlist.sh19
-rw-r--r--tools/perf/util/sort.c12
-rw-r--r--tools/perf/util/trace-event.h1
-rw-r--r--tools/testing/memblock/.gitignore1
-rw-r--r--tools/testing/memblock/Makefile3
-rw-r--r--tools/testing/selftests/amd-pstate/Makefile5
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c73
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/decap_sanity.c85
-rw-r--r--tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c81
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h6
-rw-r--r--tools/testing/selftests/bpf/progs/decap_sanity.c68
-rw-r--r--tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c42
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.inc.h2
-rw-r--r--tools/testing/selftests/bpf/verifier/search_pruning.c36
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_prs.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh2
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/filesystems/fat/run_fat_tests.sh0
-rw-r--r--tools/testing/selftests/kvm/aarch64/page_fault_test.c187
-rw-r--r--tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c7
-rw-r--r--tools/testing/selftests/lib.mk2
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/af_unix/test_unix_oob.c2
-rwxr-xr-xtools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh15
-rw-r--r--tools/testing/selftests/net/bind_timewait.c92
-rwxr-xr-xtools/testing/selftests/net/cmsg_ipv6.sh2
-rw-r--r--tools/testing/selftests/net/cmsg_sender.c2
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh128
-rwxr-xr-xtools/testing/selftests/net/forwarding/lib.sh4
-rwxr-xr-xtools/testing/selftests/net/l2_tos_ttl_inherit.sh202
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh22
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh47
-rw-r--r--tools/testing/selftests/net/nettest.c51
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_vnifiltering.sh18
-rw-r--r--tools/testing/selftests/net/toeplitz.c12
-rwxr-xr-xtools/testing/selftests/net/udpgso_bench.sh24
-rw-r--r--tools/testing/selftests/net/udpgso_bench_rx.c4
-rw-r--r--tools/testing/selftests/net/udpgso_bench_tx.c36
-rwxr-xr-xtools/testing/selftests/netfilter/nft_trans_stress.sh16
-rw-r--r--tools/testing/selftests/netfilter/settings1
-rw-r--r--tools/testing/selftests/proc/proc-empty-vm.c12
-rw-r--r--tools/testing/selftests/proc/proc-pid-vm.c9
-rw-r--r--tools/testing/selftests/vm/hugetlb-madvise.c1
-rw-r--r--tools/virtio/linux/bug.h8
-rw-r--r--tools/virtio/linux/build_bug.h7
-rw-r--r--tools/virtio/linux/cpumask.h7
-rw-r--r--tools/virtio/linux/gfp.h7
-rw-r--r--tools/virtio/linux/kernel.h1
-rw-r--r--tools/virtio/linux/kmsan.h12
-rw-r--r--tools/virtio/linux/scatterlist.h1
-rw-r--r--tools/virtio/linux/topology.h7
-rw-r--r--tools/virtio/ringtest/main.h37
-rw-r--r--tools/virtio/virtio-trace/trace-agent-ctl.c2
-rw-r--r--tools/virtio/virtio_test.c2
-rw-r--r--tools/virtio/vringh_test.c2
-rw-r--r--virt/kvm/kvm_main.c7
-rw-r--r--virt/kvm/vfio.c6
2156 files changed, 30865 insertions, 20545 deletions
diff --git a/.mailmap b/.mailmap
index ccba4cf0d893..a872c9683958 100644
--- a/.mailmap
+++ b/.mailmap
@@ -25,6 +25,8 @@ Aleksey Gorelov <aleksey_gorelov@phoenix.com>
Alexander Lobakin <alobakin@pm.me> <alobakin@dlink.ru>
Alexander Lobakin <alobakin@pm.me> <alobakin@marvell.com>
Alexander Lobakin <alobakin@pm.me> <bloodyreaper@yandex.ru>
+Alexander Mikhalitsyn <alexander@mihalicyn.com> <alexander.mikhalitsyn@virtuozzo.com>
+Alexander Mikhalitsyn <alexander@mihalicyn.com> <aleksandr.mikhalitsyn@canonical.com>
Alexandre Belloni <alexandre.belloni@bootlin.com> <alexandre.belloni@free-electrons.com>
Alexei Starovoitov <ast@kernel.org> <alexei.starovoitov@gmail.com>
Alexei Starovoitov <ast@kernel.org> <ast@fb.com>
@@ -130,6 +132,7 @@ Domen Puncer <domen@coderock.org>
Douglas Gilbert <dougg@torque.net>
Ed L. Cashin <ecashin@coraid.com>
Erik Kaneda <erik.kaneda@intel.com> <erik.schmauss@intel.com>
+Eugen Hristev <eugen.hristev@collabora.com> <eugen.hristev@microchip.com>
Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> <ezequiel@collabora.com>
Felipe W Damasio <felipewd@terra.com.br>
@@ -214,6 +217,7 @@ Jisheng Zhang <jszhang@kernel.org> <jszhang@marvell.com>
Jisheng Zhang <jszhang@kernel.org> <Jisheng.Zhang@synaptics.com>
Johan Hovold <johan@kernel.org> <jhovold@gmail.com>
Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>
+John Crispin <john@phrozen.org> <blogic@openwrt.org>
John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
John Stultz <johnstul@us.ibm.com>
Jordan Crouse <jordan@cosmicpenguin.net> <jcrouse@codeaurora.org>
@@ -371,6 +375,7 @@ Rémi Denis-Courmont <rdenis@simphalempin.com>
Ricardo Ribalda <ribalda@kernel.org> <ricardo@ribalda.com>
Ricardo Ribalda <ribalda@kernel.org> Ricardo Ribalda Delgado <ribalda@kernel.org>
Ricardo Ribalda <ribalda@kernel.org> <ricardo.ribalda@gmail.com>
+Robert Foss <rfoss@kernel.org> <robert.foss@linaro.org>
Roman Gushchin <roman.gushchin@linux.dev> <guro@fb.com>
Roman Gushchin <roman.gushchin@linux.dev> <guroan@gmail.com>
Roman Gushchin <roman.gushchin@linux.dev> <klamm@yandex-team.ru>
@@ -422,6 +427,7 @@ Tony Luck <tony.luck@intel.com>
TripleX Chung <xxx.phy@gmail.com> <triplex@zh-kernel.org>
TripleX Chung <xxx.phy@gmail.com> <zhongyu@18mail.cn>
Tsuneo Yoshioka <Tsuneo.Yoshioka@f-secure.com>
+Tudor Ambarus <tudor.ambarus@linaro.org> <tudor.ambarus@microchip.com>
Tycho Andersen <tycho@tycho.pizza> <tycho@tycho.ws>
Tzung-Bi Shih <tzungbi@kernel.org> <tzungbi@google.com>
Uwe Kleine-König <ukleinek@informatik.uni-freiburg.de>
diff --git a/CREDITS b/CREDITS
index 4e302a459ddf..5f5d70c9c038 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1173,6 +1173,10 @@ D: Future Domain TMC-16x0 SCSI driver (author)
D: APM driver (early port)
D: DRM drivers (author of several)
+N: Veaceslav Falico
+E: vfalico@gmail.com
+D: Co-maintainer and co-author of the network bonding driver.
+
N: János Farkas
E: chexum@shadow.banki.hu
D: romfs, various (mostly networking) fixes
@@ -2489,6 +2493,13 @@ D: XF86_Mach8
D: XF86_8514
D: cfdisk (curses based disk partitioning program)
+N: Mat Martineau
+E: mat@martineau.name
+D: MPTCP subsystem co-maintainer 2020-2023
+D: Keyctl restricted keyring and Diffie-Hellman UAPI
+D: Bluetooth L2CAP ERTM mode and AMP
+S: USA
+
N: John S. Marvin
E: jsm@fc.hp.com
D: PA-RISC port
@@ -4172,6 +4183,10 @@ S: B-1206 Jingmao Guojigongyu
S: 16 Baliqiao Nanjie, Beijing 101100
S: People's Repulic of China
+N: Vlad Yasevich
+E: vyasevich@gmail.com
+D: SCTP protocol maintainer.
+
N: Aviad Yehezkel
E: aviadye@nvidia.com
D: Kernel TLS implementation and offload support.
diff --git a/Documentation/ABI/testing/debugfs-pktcdvd b/Documentation/ABI/testing/debugfs-pktcdvd
new file mode 100644
index 000000000000..f6f65a4faea0
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-pktcdvd
@@ -0,0 +1,18 @@
+What: /sys/kernel/debug/pktcdvd/pktcdvd[0-7]
+Date: Oct. 2006
+KernelVersion: 2.6.20
+Contact: Thomas Maier <balagi@justmail.de>
+Description:
+
+The pktcdvd module (packet writing driver) creates
+these files in debugfs:
+
+/sys/kernel/debug/pktcdvd/pktcdvd[0-7]/
+
+ ==== ====== ====================================
+ info 0444 Lots of driver statistics and infos.
+ ==== ====== ====================================
+
+Example::
+
+ cat /sys/kernel/debug/pktcdvd/pktcdvd0/info
diff --git a/Documentation/ABI/testing/sysfs-class-pktcdvd b/Documentation/ABI/testing/sysfs-class-pktcdvd
new file mode 100644
index 000000000000..ba1ce626591d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-pktcdvd
@@ -0,0 +1,97 @@
+sysfs interface
+---------------
+The pktcdvd module (packet writing driver) creates the following files in the
+sysfs: (<devid> is in the format major:minor)
+
+What: /sys/class/pktcdvd/add
+What: /sys/class/pktcdvd/remove
+What: /sys/class/pktcdvd/device_map
+Date: Oct. 2006
+KernelVersion: 2.6.20
+Contact: Thomas Maier <balagi@justmail.de>
+Description:
+
+ ========== ==============================================
+ add (WO) Write a block device id (major:minor) to
+ create a new pktcdvd device and map it to the
+ block device.
+
+ remove (WO) Write the pktcdvd device id (major:minor)
+ to remove the pktcdvd device.
+
+ device_map (RO) Shows the device mapping in format:
+ pktcdvd[0-7] <pktdevid> <blkdevid>
+ ========== ==============================================
+
+
+What: /sys/class/pktcdvd/pktcdvd[0-7]/dev
+What: /sys/class/pktcdvd/pktcdvd[0-7]/uevent
+Date: Oct. 2006
+KernelVersion: 2.6.20
+Contact: Thomas Maier <balagi@justmail.de>
+Description:
+ dev: (RO) Device id
+
+ uevent: (WO) To send a uevent
+
+
+What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_started
+What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_finished
+What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_written
+What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read
+What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read_gather
+What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/reset
+Date: Oct. 2006
+KernelVersion: 2.6.20
+Contact: Thomas Maier <balagi@justmail.de>
+Description:
+ packets_started: (RO) Number of started packets.
+
+ packets_finished: (RO) Number of finished packets.
+
+ kb_written: (RO) kBytes written.
+
+ kb_read: (RO) kBytes read.
+
+ kb_read_gather: (RO) kBytes read to fill write packets.
+
+ reset: (WO) Write any value to it to reset
+ pktcdvd device statistic values, like
+ bytes read/written.
+
+
+What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/size
+What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_off
+What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_on
+Date: Oct. 2006
+KernelVersion: 2.6.20
+Contact: Thomas Maier <balagi@justmail.de>
+Description:
+ ============== ================================================
+ size (RO) Contains the size of the bio write queue.
+
+ congestion_off (RW) If bio write queue size is below this mark,
+ accept new bio requests from the block layer.
+
+ congestion_on (RW) If bio write queue size is higher as this
+ mark, do no longer accept bio write requests
+ from the block layer and wait till the pktcdvd
+ device has processed enough bio's so that bio
+ write queue size is below congestion off mark.
+ A value of <= 0 disables congestion control.
+ ============== ================================================
+
+
+Example:
+--------
+To use the pktcdvd sysfs interface directly, you can do::
+
+ # create a new pktcdvd device mapped to /dev/hdc
+ echo "22:0" >/sys/class/pktcdvd/add
+ cat /sys/class/pktcdvd/device_map
+ # assuming device pktcdvd0 was created, look at stat's
+ cat /sys/class/pktcdvd/pktcdvd0/stat/kb_written
+ # print the device id of the mapped block device
+ fgrep pktcdvd0 /sys/class/pktcdvd/device_map
+ # remove device, using pktcdvd0 device id 253:0
+ echo "253:0" >/sys/class/pktcdvd/remove
diff --git a/Documentation/ABI/testing/sysfs-fs-erofs b/Documentation/ABI/testing/sysfs-fs-erofs
index bb4681a01811..284224d1b56f 100644
--- a/Documentation/ABI/testing/sysfs-fs-erofs
+++ b/Documentation/ABI/testing/sysfs-fs-erofs
@@ -4,7 +4,8 @@ Contact: "Huang Jianan" <huangjianan@oppo.com>
Description: Shows all enabled kernel features.
Supported features:
zero_padding, compr_cfgs, big_pcluster, chunked_file,
- device_table, compr_head2, sb_chksum.
+ device_table, compr_head2, sb_chksum, ztailpacking,
+ dedupe, fragments.
What: /sys/fs/erofs/<disk>/sync_decompress
Date: November 2021
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index c8ae7c897f14..74cec76be9f2 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1245,13 +1245,17 @@ PAGE_SIZE multiple when read back.
This is a simple interface to trigger memory reclaim in the
target cgroup.
- This file accepts a string which contains the number of bytes to
- reclaim.
+ This file accepts a single key, the number of bytes to reclaim.
+ No nested keys are currently supported.
Example::
echo "1G" > memory.reclaim
+ The interface can be later extended with nested keys to
+ configure the reclaim behavior. For example, specify the
+ type of memory to reclaim from (anon, file, ..).
+
Please note that the kernel can over or under reclaim from
the target cgroup. If less bytes are reclaimed than the
specified amount, -EAGAIN is returned.
@@ -1263,13 +1267,6 @@ PAGE_SIZE multiple when read back.
This means that the networking layer will not adapt based on
reclaim induced by memory.reclaim.
- This file also allows the user to specify the nodes to reclaim from,
- via the 'nodes=' key, for example::
-
- echo "1G nodes=0,1" > memory.reclaim
-
- The above instructs the kernel to reclaim memory from nodes 0,1.
-
memory.peak
A read-only single value file which exists on non-root
cgroups.
diff --git a/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst b/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst
new file mode 100644
index 000000000000..875616d675fe
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst
@@ -0,0 +1,91 @@
+
+.. SPDX-License-Identifier: GPL-2.0
+
+Cross-Thread Return Address Predictions
+=======================================
+
+Certain AMD and Hygon processors are subject to a cross-thread return address
+predictions vulnerability. When running in SMT mode and one sibling thread
+transitions out of C0 state, the other sibling thread could use return target
+predictions from the sibling thread that transitioned out of C0.
+
+The Spectre v2 mitigations protect the Linux kernel, as it fills the return
+address prediction entries with safe targets when context switching to the idle
+thread. However, KVM does allow a VMM to prevent exiting guest mode when
+transitioning out of C0. This could result in a guest-controlled return target
+being consumed by the sibling thread.
+
+Affected processors
+-------------------
+
+The following CPUs are vulnerable:
+
+ - AMD Family 17h processors
+ - Hygon Family 18h processors
+
+Related CVEs
+------------
+
+The following CVE entry is related to this issue:
+
+ ============== =======================================
+ CVE-2022-27672 Cross-Thread Return Address Predictions
+ ============== =======================================
+
+Problem
+-------
+
+Affected SMT-capable processors support 1T and 2T modes of execution when SMT
+is enabled. In 2T mode, both threads in a core are executing code. For the
+processor core to enter 1T mode, it is required that one of the threads
+requests to transition out of the C0 state. This can be communicated with the
+HLT instruction or with an MWAIT instruction that requests non-C0.
+When the thread re-enters the C0 state, the processor transitions back
+to 2T mode, assuming the other thread is also still in C0 state.
+
+In affected processors, the return address predictor (RAP) is partitioned
+depending on the SMT mode. For instance, in 2T mode each thread uses a private
+16-entry RAP, but in 1T mode, the active thread uses a 32-entry RAP. Upon
+transition between 1T/2T mode, the RAP contents are not modified but the RAP
+pointers (which control the next return target to use for predictions) may
+change. This behavior may result in return targets from one SMT thread being
+used by RET predictions in the sibling thread following a 1T/2T switch. In
+particular, a RET instruction executed immediately after a transition to 1T may
+use a return target from the thread that just became idle. In theory, this
+could lead to information disclosure if the return targets used do not come
+from trustworthy code.
+
+Attack scenarios
+----------------
+
+An attack can be mounted on affected processors by performing a series of CALL
+instructions with targeted return locations and then transitioning out of C0
+state.
+
+Mitigation mechanism
+--------------------
+
+Before entering idle state, the kernel context switches to the idle thread. The
+context switch fills the RAP entries (referred to as the RSB in Linux) with safe
+targets by performing a sequence of CALL instructions.
+
+Prevent a guest VM from directly putting the processor into an idle state by
+intercepting HLT and MWAIT instructions.
+
+Both mitigations are required to fully address this issue.
+
+Mitigation control on the kernel command line
+---------------------------------------------
+
+Use existing Spectre v2 mitigations that will fill the RSB on context switch.
+
+Mitigation control for KVM - module parameter
+---------------------------------------------
+
+By default, the KVM hypervisor mitigates this issue by intercepting guest
+attempts to transition out of C0. A VMM can use the KVM_CAP_X86_DISABLE_EXITS
+capability to override those interceptions, but since this is not common, the
+mitigation that covers this path is not enabled by default.
+
+The mitigation for the KVM_CAP_X86_DISABLE_EXITS capability can be turned on
+using the boolean module parameter mitigate_smt_rsb, e.g. ``kvm.mitigate_smt_rsb=1``.
diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
index 4df436e7c417..e0614760a99e 100644
--- a/Documentation/admin-guide/hw-vuln/index.rst
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -18,3 +18,4 @@ are configurable at compile, boot or run time.
core-scheduling.rst
l1d_flush.rst
processor_mmio_stale_data.rst
+ cross-thread-rsb.rst
diff --git a/Documentation/admin-guide/mm/zswap.rst b/Documentation/admin-guide/mm/zswap.rst
index f67de481c7f6..6dd74a18268b 100644
--- a/Documentation/admin-guide/mm/zswap.rst
+++ b/Documentation/admin-guide/mm/zswap.rst
@@ -70,9 +70,7 @@ e.g. ``zswap.zpool=zbud``. It can also be changed at runtime using the sysfs
The zbud type zpool allocates exactly 1 page to store 2 compressed pages, which
means the compression ratio will always be 2:1 or worse (because of half-full
zbud pages). The zsmalloc type zpool has a more complex compressed page
-storage method, and it can achieve greater storage densities. However,
-zsmalloc does not implement compressed page eviction, so once zswap fills it
-cannot evict the oldest page, it can only reject new pages.
+storage method, and it can achieve greater storage densities.
When a swap page is passed from frontswap to zswap, zswap maintains a mapping
of the swap entry, a combination of the swap type and swap offset, to the zpool
diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index 808ade4cc008..ec5f889d7681 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -120,6 +120,8 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A710 | #2224489 | ARM64_ERRATUM_2224489 |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A715 | #2645198 | ARM64_ERRATUM_2645198 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-X2 | #2119858 | ARM64_ERRATUM_2119858 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-X2 | #2224489 | ARM64_ERRATUM_2224489 |
diff --git a/Documentation/conf.py b/Documentation/conf.py
index a5c45df0bd83..d927737e3c10 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -31,6 +31,12 @@ def have_command(cmd):
# Get Sphinx version
major, minor, patch = sphinx.version_info[:3]
+#
+# Warn about older versions that we don't want to support for much
+# longer.
+#
+if (major < 2) or (major == 2 and minor < 4):
+ print('WARNING: support for Sphinx < 2.4 will be removed soon.')
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
@@ -339,7 +345,11 @@ html_use_smartypants = False
# Custom sidebar templates, maps document names to template names.
# Note that the RTD theme ignores this
-html_sidebars = { '**': ["about.html", 'searchbox.html', 'localtoc.html', 'sourcelink.html']}
+html_sidebars = { '**': ['searchbox.html', 'localtoc.html', 'sourcelink.html']}
+
+# about.html is available for alabaster theme. Add it at the front.
+if html_theme == 'alabaster':
+ html_sidebars['**'].insert(0, 'about.html')
# Output file base name for HTML help builder.
htmlhelp_basename = 'TheLinuxKerneldoc'
diff --git a/Documentation/devicetree/bindings/.gitignore b/Documentation/devicetree/bindings/.gitignore
index a77719968a7e..51ddb26d93f0 100644
--- a/Documentation/devicetree/bindings/.gitignore
+++ b/Documentation/devicetree/bindings/.gitignore
@@ -2,3 +2,8 @@
*.example.dts
/processed-schema*.yaml
/processed-schema*.json
+
+#
+# We don't want to ignore the following even if they are dot-files
+#
+!.yamllint
diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml
index 903b31129f01..99e159bc5fb1 100644
--- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml
@@ -54,6 +54,17 @@ properties:
- const: xo
- const: alternate
+ interrupts:
+ minItems: 1
+ maxItems: 3
+
+ interrupt-names:
+ minItems: 1
+ items:
+ - const: dcvsh-irq-0
+ - const: dcvsh-irq-1
+ - const: dcvsh-irq-2
+
'#freq-domain-cells':
const: 1
diff --git a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml
index 0ccaab16dc61..0b7383b3106b 100644
--- a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml
+++ b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml
@@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Atmel Advanced Encryption Standard (AES) HW cryptographic accelerator
maintainers:
- - Tudor Ambarus <tudor.ambarus@microchip.com>
+ - Tudor Ambarus <tudor.ambarus@linaro.org>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml
index 5163c51b4547..ee2ffb034325 100644
--- a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml
+++ b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml
@@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Atmel Secure Hash Algorithm (SHA) HW cryptographic accelerator
maintainers:
- - Tudor Ambarus <tudor.ambarus@microchip.com>
+ - Tudor Ambarus <tudor.ambarus@linaro.org>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml
index fcc5adf03cad..3d6ed24b1b00 100644
--- a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml
+++ b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml
@@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Atmel Triple Data Encryption Standard (TDES) HW cryptographic accelerator
maintainers:
- - Tudor Ambarus <tudor.ambarus@microchip.com>
+ - Tudor Ambarus <tudor.ambarus@linaro.org>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml
index f2c143730a55..6e2fd6e9fa7f 100644
--- a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml
+++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml
@@ -32,7 +32,7 @@ properties:
- description: Display byte clock
- description: Display byte interface clock
- description: Display pixel clock
- - description: Display escape clock
+ - description: Display core clock
- description: Display AHB clock
- description: Display AXI clock
@@ -137,8 +137,6 @@ required:
- phys
- assigned-clocks
- assigned-clock-parents
- - power-domains
- - operating-points-v2
- ports
additionalProperties: false
diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml
index d9ad8b659f58..3ec466c3ab38 100644
--- a/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml
+++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml
@@ -69,7 +69,6 @@ required:
- compatible
- reg
- reg-names
- - vdds-supply
unevaluatedProperties: false
diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml
index 819de5ce0bc9..a43e11d3b00d 100644
--- a/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml
+++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml
@@ -39,7 +39,6 @@ required:
- compatible
- reg
- reg-names
- - vcca-supply
unevaluatedProperties: false
diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml
index 3d8540a06fe2..2f1fd140c87d 100644
--- a/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml
+++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml
@@ -34,6 +34,10 @@ properties:
vddio-supply:
description: Phandle to vdd-io regulator device node.
+ qcom,dsi-phy-regulator-ldo-mode:
+ type: boolean
+ description: Indicates if the LDO mode PHY regulator is wanted.
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml
index d6f043a4b08d..4795e13c7b59 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml
@@ -72,7 +72,7 @@ examples:
#include <dt-bindings/interconnect/qcom,qcm2290.h>
#include <dt-bindings/power/qcom-rpmpd.h>
- mdss@5e00000 {
+ display-subsystem@5e00000 {
#address-cells = <1>;
#size-cells = <1>;
compatible = "qcom,qcm2290-mdss";
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml
index a86d7f53fa84..886858ef6700 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml
@@ -62,7 +62,7 @@ examples:
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/power/qcom-rpmpd.h>
- mdss@5e00000 {
+ display-subsystem@5e00000 {
#address-cells = <1>;
#size-cells = <1>;
compatible = "qcom,sm6115-mdss";
diff --git a/Documentation/devicetree/bindings/i2c/renesas,rzv2m.yaml b/Documentation/devicetree/bindings/i2c/renesas,rzv2m.yaml
index c46378efc123..92e899905ef8 100644
--- a/Documentation/devicetree/bindings/i2c/renesas,rzv2m.yaml
+++ b/Documentation/devicetree/bindings/i2c/renesas,rzv2m.yaml
@@ -16,7 +16,7 @@ properties:
compatible:
items:
- enum:
- - renesas,i2c-r9a09g011 # RZ/V2M
+ - renesas,r9a09g011-i2c # RZ/V2M
- const: renesas,rzv2m-i2c
reg:
@@ -66,7 +66,7 @@ examples:
#include <dt-bindings/interrupt-controller/arm-gic.h>
i2c0: i2c@a4030000 {
- compatible = "renesas,i2c-r9a09g011", "renesas,rzv2m-i2c";
+ compatible = "renesas,r9a09g011-i2c", "renesas,rzv2m-i2c";
reg = <0xa4030000 0x80>;
interrupts = <GIC_SPI 232 IRQ_TYPE_EDGE_RISING>,
<GIC_SPI 236 IRQ_TYPE_EDGE_RISING>;
diff --git a/Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml b/Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml
index 4b37aa88a375..5e6be4e79201 100644
--- a/Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml
+++ b/Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml
@@ -84,7 +84,6 @@ allOf:
- qcom,msm8939-pcnoc
- qcom,msm8939-snoc
- qcom,msm8996-a1noc
- - qcom,msm8996-a2noc
- qcom,msm8996-bimc
- qcom,msm8996-cnoc
- qcom,msm8996-pnoc
@@ -191,6 +190,29 @@ allOf:
compatible:
contains:
enum:
+ - qcom,msm8996-a2noc
+
+ then:
+ properties:
+ clock-names:
+ items:
+ - const: bus
+ - const: bus_a
+ - const: aggre2_ufs_axi
+ - const: ufs_axi
+
+ clocks:
+ items:
+ - description: Bus Clock
+ - description: Bus A Clock
+ - description: Aggregate2 NoC UFS AXI Clock
+ - description: UFS AXI Clock
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
- qcom,sdm660-a2noc
then:
diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
index 9f7d3e11aacb..8449e14af9f3 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
@@ -108,7 +108,7 @@ properties:
msi-controller:
description:
- Only present if the Message Based Interrupt functionnality is
+ Only present if the Message Based Interrupt functionality is
being exposed by the HW, and the mbi-ranges property present.
mbi-ranges:
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
index 1432fda3b603..47bc2057e629 100644
--- a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
+++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
@@ -40,6 +40,9 @@ properties:
clock-names:
const: stmmaceth
+ phy-supply:
+ description: PHY regulator
+
syscon:
$ref: /schemas/types.yaml#/definitions/phandle
description:
diff --git a/Documentation/devicetree/bindings/net/marvell,orion-mdio.yaml b/Documentation/devicetree/bindings/net/marvell,orion-mdio.yaml
index d2906b4a0f59..e35da8b01dc2 100644
--- a/Documentation/devicetree/bindings/net/marvell,orion-mdio.yaml
+++ b/Documentation/devicetree/bindings/net/marvell,orion-mdio.yaml
@@ -16,9 +16,6 @@ description: |
8k has a second unit which provides an interface with the xMDIO bus. This
driver handles these interfaces.
-allOf:
- - $ref: "mdio.yaml#"
-
properties:
compatible:
enum:
@@ -39,13 +36,38 @@ required:
- compatible
- reg
+allOf:
+ - $ref: mdio.yaml#
+
+ - if:
+ required:
+ - interrupts
+
+ then:
+ properties:
+ reg:
+ items:
+ - items:
+ - $ref: /schemas/types.yaml#/definitions/cell
+ - const: 0x84
+
+ else:
+ properties:
+ reg:
+ items:
+ - items:
+ - $ref: /schemas/types.yaml#/definitions/cell
+ - enum:
+ - 0x4
+ - 0x10
+
unevaluatedProperties: false
examples:
- |
mdio@d0072004 {
compatible = "marvell,orion-mdio";
- reg = <0xd0072004 0x4>;
+ reg = <0xd0072004 0x84>;
#address-cells = <1>;
#size-cells = <0>;
interrupts = <30>;
diff --git a/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml b/Documentation/devicetree/bindings/phy/amlogic,g12a-usb2-phy.yaml
index f3a5fbabbbb5..bb01c6b34dab 100644
--- a/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/amlogic,g12a-usb2-phy.yaml
@@ -2,7 +2,7 @@
# Copyright 2019 BayLibre, SAS
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/phy/amlogic,meson-g12a-usb2-phy.yaml#"
+$id: "http://devicetree.org/schemas/phy/amlogic,g12a-usb2-phy.yaml#"
$schema: "http://devicetree.org/meta-schemas/core.yaml#"
title: Amlogic G12A USB2 PHY
@@ -13,8 +13,8 @@ maintainers:
properties:
compatible:
enum:
- - amlogic,meson-g12a-usb2-phy
- - amlogic,meson-a1-usb2-phy
+ - amlogic,g12a-usb2-phy
+ - amlogic,a1-usb2-phy
reg:
maxItems: 1
@@ -68,7 +68,7 @@ additionalProperties: false
examples:
- |
phy@36000 {
- compatible = "amlogic,meson-g12a-usb2-phy";
+ compatible = "amlogic,g12a-usb2-phy";
reg = <0x36000 0x2000>;
clocks = <&xtal>;
clock-names = "xtal";
diff --git a/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb3-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/amlogic,g12a-usb3-pcie-phy.yaml
index 868b4e6fde71..129d26e99776 100644
--- a/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb3-pcie-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/amlogic,g12a-usb3-pcie-phy.yaml
@@ -2,7 +2,7 @@
# Copyright 2019 BayLibre, SAS
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/phy/amlogic,meson-g12a-usb3-pcie-phy.yaml#"
+$id: "http://devicetree.org/schemas/phy/amlogic,g12a-usb3-pcie-phy.yaml#"
$schema: "http://devicetree.org/meta-schemas/core.yaml#"
title: Amlogic G12A USB3 + PCIE Combo PHY
@@ -13,7 +13,7 @@ maintainers:
properties:
compatible:
enum:
- - amlogic,meson-g12a-usb3-pcie-phy
+ - amlogic,g12a-usb3-pcie-phy
reg:
maxItems: 1
@@ -49,7 +49,7 @@ additionalProperties: false
examples:
- |
phy@46000 {
- compatible = "amlogic,meson-g12a-usb3-pcie-phy";
+ compatible = "amlogic,g12a-usb3-pcie-phy";
reg = <0x46000 0x2000>;
clocks = <&ref_clk>;
clock-names = "ref_clk";
diff --git a/Documentation/devicetree/bindings/phy/qcom,usb-hs-28nm.yaml b/Documentation/devicetree/bindings/phy/qcom,usb-hs-28nm.yaml
index abcc4373f39e..ca6a0836b53c 100644
--- a/Documentation/devicetree/bindings/phy/qcom,usb-hs-28nm.yaml
+++ b/Documentation/devicetree/bindings/phy/qcom,usb-hs-28nm.yaml
@@ -16,7 +16,6 @@ properties:
compatible:
enum:
- qcom,usb-hs-28nm-femtophy
- - qcom,usb-hs-28nm-mdm9607
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/regulator/samsung,s2mps14.yaml b/Documentation/devicetree/bindings/regulator/samsung,s2mps14.yaml
index 01f9d4e236e9..a7feb497eb89 100644
--- a/Documentation/devicetree/bindings/regulator/samsung,s2mps14.yaml
+++ b/Documentation/devicetree/bindings/regulator/samsung,s2mps14.yaml
@@ -19,8 +19,8 @@ description: |
additional information and example.
patternProperties:
- # 25 LDOs
- "^LDO([1-9]|[1][0-9]|2[0-5])$":
+ # 25 LDOs, without LDO10-12
+ "^LDO([1-9]|1[3-9]|2[0-5])$":
type: object
$ref: regulator.yaml#
unevaluatedProperties: false
@@ -30,6 +30,23 @@ patternProperties:
required:
- regulator-name
+ "^LDO(1[0-2])$":
+ type: object
+ $ref: regulator.yaml#
+ unevaluatedProperties: false
+ description:
+ Properties for single LDO regulator.
+
+ properties:
+ samsung,ext-control-gpios:
+ maxItems: 1
+ description:
+ LDO10, LDO11 and LDO12 can be configured to external control over
+ GPIO.
+
+ required:
+ - regulator-name
+
# 5 bucks
"^BUCK[1-5]$":
type: object
diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml
index c6720764e765..a2884e3113da 100644
--- a/Documentation/devicetree/bindings/riscv/cpus.yaml
+++ b/Documentation/devicetree/bindings/riscv/cpus.yaml
@@ -83,7 +83,7 @@ properties:
insensitive, letters in the riscv,isa string must be all
lowercase to simplify parsing.
$ref: "/schemas/types.yaml#/definitions/string"
- pattern: ^rv(?:64|32)imaf?d?q?c?b?v?k?h?(?:_[hsxz](?:[a-z])+)*$
+ pattern: ^rv(?:64|32)imaf?d?q?c?b?k?j?p?v?h?(?:[hsxz](?:[a-z])+)?(?:_[hsxz](?:[a-z])+)*$
# RISC-V requires 'timebase-frequency' in /cpus, so disallow it here
timebase-frequency: false
diff --git a/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml b/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml
index 0a7aa29563c1..21c8ea08ff0a 100644
--- a/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml
@@ -40,6 +40,8 @@ properties:
description:
Indicates that the setting of RTC time is allowed by the host CPU.
+ wakeup-source: true
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,apr-services.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,apr-services.yaml
index 290555426c39..bdf482db32aa 100644
--- a/Documentation/devicetree/bindings/soc/qcom/qcom,apr-services.yaml
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,apr-services.yaml
@@ -39,8 +39,8 @@ properties:
qcom,protection-domain:
$ref: /schemas/types.yaml#/definitions/string-array
description: |
- Protection domain service name and path for APR service
- possible values are::
+ Protection domain service name and path for APR service (if supported).
+ Possible values are::
"avs/audio", "msm/adsp/audio_pd".
"kernel/elf_loader", "msm/modem/wlan_pd".
"tms/servreg", "msm/adsp/audio_pd".
@@ -49,6 +49,5 @@ properties:
required:
- reg
- - qcom,protection-domain
additionalProperties: true
diff --git a/Documentation/devicetree/bindings/sound/everest,es8326.yaml b/Documentation/devicetree/bindings/sound/everest,es8326.yaml
index 07781408e788..07781408e788 100755..100644
--- a/Documentation/devicetree/bindings/sound/everest,es8326.yaml
+++ b/Documentation/devicetree/bindings/sound/everest,es8326.yaml
diff --git a/Documentation/devicetree/bindings/sound/mt8186-mt6366-rt1019-rt5682s.yaml b/Documentation/devicetree/bindings/sound/mt8186-mt6366-rt1019-rt5682s.yaml
index 9d3139990237..aa23b0024c46 100644
--- a/Documentation/devicetree/bindings/sound/mt8186-mt6366-rt1019-rt5682s.yaml
+++ b/Documentation/devicetree/bindings/sound/mt8186-mt6366-rt1019-rt5682s.yaml
@@ -16,6 +16,7 @@ properties:
compatible:
enum:
- mediatek,mt8186-mt6366-rt1019-rt5682s-sound
+ - mediatek,mt8186-mt6366-rt5682s-max98360-sound
mediatek,platform:
$ref: "/schemas/types.yaml#/definitions/phandle"
diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml b/Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml
index 66431aade3b7..da5f70910da5 100644
--- a/Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml
+++ b/Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml
@@ -30,7 +30,9 @@ properties:
const: 0
clocks:
- maxItems: 5
+ oneOf:
+ - maxItems: 3
+ - maxItems: 5
clock-names:
oneOf:
diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-wsa-macro.yaml b/Documentation/devicetree/bindings/sound/qcom,lpass-wsa-macro.yaml
index 2bf8d082f8f1..66cbb1f5e31a 100644
--- a/Documentation/devicetree/bindings/sound/qcom,lpass-wsa-macro.yaml
+++ b/Documentation/devicetree/bindings/sound/qcom,lpass-wsa-macro.yaml
@@ -9,9 +9,6 @@ title: LPASS(Low Power Audio Subsystem) VA Macro audio codec
maintainers:
- Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
-allOf:
- - $ref: dai-common.yaml#
-
properties:
compatible:
enum:
@@ -30,15 +27,12 @@ properties:
const: 0
clocks:
- maxItems: 5
+ minItems: 5
+ maxItems: 6
clock-names:
- items:
- - const: mclk
- - const: npl
- - const: macro
- - const: dcodec
- - const: fsgen
+ minItems: 5
+ maxItems: 6
clock-output-names:
maxItems: 1
@@ -55,10 +49,51 @@ required:
- reg
- "#sound-dai-cells"
+allOf:
+ - $ref: dai-common.yaml#
+
+ - if:
+ properties:
+ compatible:
+ enum:
+ - qcom,sc7280-lpass-wsa-macro
+ - qcom,sm8450-lpass-wsa-macro
+ - qcom,sc8280xp-lpass-wsa-macro
+ then:
+ properties:
+ clocks:
+ maxItems: 5
+ clock-names:
+ items:
+ - const: mclk
+ - const: npl
+ - const: macro
+ - const: dcodec
+ - const: fsgen
+
+ - if:
+ properties:
+ compatible:
+ enum:
+ - qcom,sm8250-lpass-wsa-macro
+ then:
+ properties:
+ clocks:
+ minItems: 6
+ clock-names:
+ items:
+ - const: mclk
+ - const: npl
+ - const: macro
+ - const: dcodec
+ - const: va
+ - const: fsgen
+
unevaluatedProperties: false
examples:
- |
+ #include <dt-bindings/clock/qcom,sm8250-lpass-aoncc.h>
#include <dt-bindings/sound/qcom,q6afe.h>
codec@3240000 {
compatible = "qcom,sm8250-lpass-wsa-macro";
@@ -69,7 +104,8 @@ examples:
<&audiocc 0>,
<&q6afecc LPASS_HW_MACRO_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
<&q6afecc LPASS_HW_DCODEC_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+ <&aoncc LPASS_CDC_VA_MCLK>,
<&vamacro>;
- clock-names = "mclk", "npl", "macro", "dcodec", "fsgen";
+ clock-names = "mclk", "npl", "macro", "dcodec", "va", "fsgen";
clock-output-names = "mclk";
};
diff --git a/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml b/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml
index bcbfa71536cd..3efdc192ab01 100644
--- a/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml
+++ b/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml
@@ -80,7 +80,7 @@ properties:
or applicable for the respective data port.
More info in MIPI Alliance SoundWire 1.0 Specifications.
minItems: 3
- maxItems: 5
+ maxItems: 8
qcom,ports-sinterval-low:
$ref: /schemas/types.yaml#/definitions/uint8-array
@@ -124,7 +124,7 @@ properties:
or applicable for the respective data port.
More info in MIPI Alliance SoundWire 1.0 Specifications.
minItems: 3
- maxItems: 5
+ maxItems: 8
qcom,ports-block-pack-mode:
$ref: /schemas/types.yaml#/definitions/uint8-array
@@ -154,7 +154,7 @@ properties:
or applicable for the respective data port.
More info in MIPI Alliance SoundWire 1.0 Specifications.
minItems: 3
- maxItems: 5
+ maxItems: 8
items:
oneOf:
- minimum: 0
@@ -171,7 +171,7 @@ properties:
or applicable for the respective data port.
More info in MIPI Alliance SoundWire 1.0 Specifications.
minItems: 3
- maxItems: 5
+ maxItems: 8
items:
oneOf:
- minimum: 0
@@ -187,7 +187,7 @@ properties:
or applicable for the respective data port.
More info in MIPI Alliance SoundWire 1.0 Specifications.
minItems: 3
- maxItems: 5
+ maxItems: 8
items:
oneOf:
- minimum: 0
diff --git a/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml b/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml
index 4dd973e341e6..6c57dd6c3a36 100644
--- a/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml
+++ b/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml
@@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Atmel SPI device
maintainers:
- - Tudor Ambarus <tudor.ambarus@microchip.com>
+ - Tudor Ambarus <tudor.ambarus@linaro.org>
allOf:
- $ref: spi-controller.yaml#
diff --git a/Documentation/devicetree/bindings/spi/atmel,quadspi.yaml b/Documentation/devicetree/bindings/spi/atmel,quadspi.yaml
index 1d493add4053..b0d99bc10535 100644
--- a/Documentation/devicetree/bindings/spi/atmel,quadspi.yaml
+++ b/Documentation/devicetree/bindings/spi/atmel,quadspi.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Atmel Quad Serial Peripheral Interface (QSPI)
maintainers:
- - Tudor Ambarus <tudor.ambarus@microchip.com>
+ - Tudor Ambarus <tudor.ambarus@linaro.org>
allOf:
- $ref: spi-controller.yaml#
diff --git a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml
index ead2cccf658f..9a60c0664bbe 100644
--- a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml
@@ -44,9 +44,9 @@ properties:
description:
Maximum SPI clocking speed of the device in Hz.
- spi-cs-setup-ns:
+ spi-cs-setup-delay-ns:
description:
- Delay in nanosecods to be introduced by the controller after CS is
+ Delay in nanoseconds to be introduced by the controller after CS is
asserted.
spi-rx-bus-width:
diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst
index 067fd1670b1f..a43aacf1494e 100644
--- a/Documentation/filesystems/erofs.rst
+++ b/Documentation/filesystems/erofs.rst
@@ -120,6 +120,8 @@ dax={always,never} Use direct access (no page cache). See
dax A legacy option which is an alias for ``dax=always``.
device=%s Specify a path to an extra device to be used together.
fsid=%s Specify a filesystem image ID for Fscache back-end.
+domain_id=%s Specify a domain ID in fscache mode so that different images
+ with the same blobs under a given domain ID can share storage.
=================== =========================================================
Sysfs Entries
diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index ef183387da20..eccd327e6df5 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -1277,8 +1277,8 @@ the file contents themselves, as described below:
For the read path (->read_folio()) of regular files, filesystems can
read the ciphertext into the page cache and decrypt it in-place. The
-page lock must be held until decryption has finished, to prevent the
-page from becoming visible to userspace prematurely.
+folio lock must be held until decryption has finished, to prevent the
+folio from becoming visible to userspace prematurely.
For the write path (->writepage()) of regular files, filesystems
cannot encrypt data in-place in the page cache, since the cached
diff --git a/Documentation/filesystems/fsverity.rst b/Documentation/filesystems/fsverity.rst
index cb8e7573882a..ede672dedf11 100644
--- a/Documentation/filesystems/fsverity.rst
+++ b/Documentation/filesystems/fsverity.rst
@@ -118,10 +118,11 @@ as follows:
- ``hash_algorithm`` must be the identifier for the hash algorithm to
use for the Merkle tree, such as FS_VERITY_HASH_ALG_SHA256. See
``include/uapi/linux/fsverity.h`` for the list of possible values.
-- ``block_size`` must be the Merkle tree block size. Currently, this
- must be equal to the system page size, which is usually 4096 bytes.
- Other sizes may be supported in the future. This value is not
- necessarily the same as the filesystem block size.
+- ``block_size`` is the Merkle tree block size, in bytes. In Linux
+ v6.3 and later, this can be any power of 2 between (inclusively)
+ 1024 and the minimum of the system page size and the filesystem
+ block size. In earlier versions, the page size was the only allowed
+ value.
- ``salt_size`` is the size of the salt in bytes, or 0 if no salt is
provided. The salt is a value that is prepended to every hashed
block; it can be used to personalize the hashing for a particular
@@ -161,6 +162,7 @@ FS_IOC_ENABLE_VERITY can fail with the following errors:
- ``EBUSY``: this ioctl is already running on the file
- ``EEXIST``: the file already has verity enabled
- ``EFAULT``: the caller provided inaccessible memory
+- ``EFBIG``: the file is too large to enable verity on
- ``EINTR``: the operation was interrupted by a fatal signal
- ``EINVAL``: unsupported version, hash algorithm, or block size; or
reserved bits are set; or the file descriptor refers to neither a
@@ -495,9 +497,11 @@ To create verity files on an ext4 filesystem, the filesystem must have
been formatted with ``-O verity`` or had ``tune2fs -O verity`` run on
it. "verity" is an RO_COMPAT filesystem feature, so once set, old
kernels will only be able to mount the filesystem readonly, and old
-versions of e2fsck will be unable to check the filesystem. Moreover,
-currently ext4 only supports mounting a filesystem with the "verity"
-feature when its block size is equal to PAGE_SIZE (often 4096 bytes).
+versions of e2fsck will be unable to check the filesystem.
+
+Originally, an ext4 filesystem with the "verity" feature could only be
+mounted when its block size was equal to the system page size
+(typically 4096 bytes). In Linux v6.3, this limitation was removed.
ext4 sets the EXT4_VERITY_FL on-disk inode flag on verity files. It
can only be set by `FS_IOC_ENABLE_VERITY`_, and it cannot be cleared.
@@ -518,9 +522,7 @@ support paging multi-gigabyte xattrs into memory, and to support
encrypting xattrs. Note that the verity metadata *must* be encrypted
when the file is, since it contains hashes of the plaintext data.
-Currently, ext4 verity only supports the case where the Merkle tree
-block size, filesystem block size, and page size are all the same. It
-also only supports extent-based files.
+ext4 only allows verity on extent-based files.
f2fs
----
@@ -538,11 +540,10 @@ Like ext4, f2fs stores the verity metadata (Merkle tree and
fsverity_descriptor) past the end of the file, starting at the first
64K boundary beyond i_size. See explanation for ext4 above.
Moreover, f2fs supports at most 4096 bytes of xattr entries per inode
-which wouldn't be enough for even a single Merkle tree block.
+which usually wouldn't be enough for even a single Merkle tree block.
-Currently, f2fs verity only supports a Merkle tree block size of 4096.
-Also, f2fs doesn't support enabling verity on files that currently
-have atomic or volatile writes pending.
+f2fs doesn't support enabling verity on files that currently have
+atomic or volatile writes pending.
btrfs
-----
@@ -567,51 +568,48 @@ Pagecache
~~~~~~~~~
For filesystems using Linux's pagecache, the ``->read_folio()`` and
-``->readahead()`` methods must be modified to verify pages before they
-are marked Uptodate. Merely hooking ``->read_iter()`` would be
+``->readahead()`` methods must be modified to verify folios before
+they are marked Uptodate. Merely hooking ``->read_iter()`` would be
insufficient, since ``->read_iter()`` is not used for memory maps.
-Therefore, fs/verity/ provides a function fsverity_verify_page() which
-verifies a page that has been read into the pagecache of a verity
-inode, but is still locked and not Uptodate, so it's not yet readable
-by userspace. As needed to do the verification,
-fsverity_verify_page() will call back into the filesystem to read
-Merkle tree pages via fsverity_operations::read_merkle_tree_page().
+Therefore, fs/verity/ provides the function fsverity_verify_blocks()
+which verifies data that has been read into the pagecache of a verity
+inode. The containing folio must still be locked and not Uptodate, so
+it's not yet readable by userspace. As needed to do the verification,
+fsverity_verify_blocks() will call back into the filesystem to read
+hash blocks via fsverity_operations::read_merkle_tree_page().
-fsverity_verify_page() returns false if verification failed; in this
-case, the filesystem must not set the page Uptodate. Following this,
+fsverity_verify_blocks() returns false if verification failed; in this
+case, the filesystem must not set the folio Uptodate. Following this,
as per the usual Linux pagecache behavior, attempts by userspace to
-read() from the part of the file containing the page will fail with
-EIO, and accesses to the page within a memory map will raise SIGBUS.
-
-fsverity_verify_page() currently only supports the case where the
-Merkle tree block size is equal to PAGE_SIZE (often 4096 bytes).
+read() from the part of the file containing the folio will fail with
+EIO, and accesses to the folio within a memory map will raise SIGBUS.
-In principle, fsverity_verify_page() verifies the entire path in the
-Merkle tree from the data page to the root hash. However, for
-efficiency the filesystem may cache the hash pages. Therefore,
-fsverity_verify_page() only ascends the tree reading hash pages until
-an already-verified hash page is seen, as indicated by the PageChecked
-bit being set. It then verifies the path to that page.
+In principle, verifying a data block requires verifying the entire
+path in the Merkle tree from the data block to the root hash.
+However, for efficiency the filesystem may cache the hash blocks.
+Therefore, fsverity_verify_blocks() only ascends the tree reading hash
+blocks until an already-verified hash block is seen. It then verifies
+the path to that block.
This optimization, which is also used by dm-verity, results in
excellent sequential read performance. This is because usually (e.g.
-127 in 128 times for 4K blocks and SHA-256) the hash page from the
+127 in 128 times for 4K blocks and SHA-256) the hash block from the
bottom level of the tree will already be cached and checked from
-reading a previous data page. However, random reads perform worse.
+reading a previous data block. However, random reads perform worse.
Block device based filesystems
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Block device based filesystems (e.g. ext4 and f2fs) in Linux also use
the pagecache, so the above subsection applies too. However, they
-also usually read many pages from a file at once, grouped into a
+also usually read many data blocks from a file at once, grouped into a
structure called a "bio". To make it easier for these types of
filesystems to support fs-verity, fs/verity/ also provides a function
-fsverity_verify_bio() which verifies all pages in a bio.
+fsverity_verify_bio() which verifies all data blocks in a bio.
ext4 and f2fs also support encryption. If a verity file is also
-encrypted, the pages must be decrypted before being verified. To
+encrypted, the data must be decrypted before being verified. To
support this, these filesystems allocate a "post-read context" for
each bio and store it in ``->bi_private``::
@@ -626,14 +624,14 @@ each bio and store it in ``->bi_private``::
verity, or both is enabled. After the bio completes, for each needed
postprocessing step the filesystem enqueues the bio_post_read_ctx on a
workqueue, and then the workqueue work does the decryption or
-verification. Finally, pages where no decryption or verity error
-occurred are marked Uptodate, and the pages are unlocked.
+verification. Finally, folios where no decryption or verity error
+occurred are marked Uptodate, and the folios are unlocked.
On many filesystems, files can contain holes. Normally,
-``->readahead()`` simply zeroes holes and sets the corresponding pages
-Uptodate; no bios are issued. To prevent this case from bypassing
-fs-verity, these filesystems use fsverity_verify_page() to verify hole
-pages.
+``->readahead()`` simply zeroes hole blocks and considers the
+corresponding data to be up-to-date; no bios are issued. To prevent
+this case from bypassing fs-verity, filesystems use
+fsverity_verify_blocks() to verify hole blocks.
Filesystems also disable direct I/O on verity files, since otherwise
direct I/O would bypass fs-verity.
@@ -644,7 +642,7 @@ Userspace utility
This document focuses on the kernel, but a userspace utility for
fs-verity can be found at:
- https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/fsverity-utils.git
+ https://git.kernel.org/pub/scm/fs/fsverity/fsverity-utils.git
See the README.md file in the fsverity-utils source tree for details,
including examples of setting up fs-verity protected files.
@@ -793,9 +791,9 @@ weren't already directly answered in other parts of this document.
:A: There are many reasons why this is not possible or would be very
difficult, including the following:
- - To prevent bypassing verification, pages must not be marked
+ - To prevent bypassing verification, folios must not be marked
Uptodate until they've been verified. Currently, each
- filesystem is responsible for marking pages Uptodate via
+ filesystem is responsible for marking folios Uptodate via
``->readahead()``. Therefore, currently it's not possible for
the VFS to do the verification on its own. Changing this would
require significant changes to the VFS and all filesystems.
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 36fa2a83d714..7de7a7272a5e 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -56,35 +56,35 @@ inode_operations
prototypes::
- int (*create) (struct inode *,struct dentry *,umode_t, bool);
+ int (*create) (struct mnt_idmap *, struct inode *,struct dentry *,umode_t, bool);
struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
int (*link) (struct dentry *,struct inode *,struct dentry *);
int (*unlink) (struct inode *,struct dentry *);
- int (*symlink) (struct inode *,struct dentry *,const char *);
- int (*mkdir) (struct inode *,struct dentry *,umode_t);
+ int (*symlink) (struct mnt_idmap *, struct inode *,struct dentry *,const char *);
+ int (*mkdir) (struct mnt_idmap *, struct inode *,struct dentry *,umode_t);
int (*rmdir) (struct inode *,struct dentry *);
- int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
- int (*rename) (struct inode *, struct dentry *,
+ int (*mknod) (struct mnt_idmap *, struct inode *,struct dentry *,umode_t,dev_t);
+ int (*rename) (struct mnt_idmap *, struct inode *, struct dentry *,
struct inode *, struct dentry *, unsigned int);
int (*readlink) (struct dentry *, char __user *,int);
const char *(*get_link) (struct dentry *, struct inode *, struct delayed_call *);
void (*truncate) (struct inode *);
- int (*permission) (struct inode *, int, unsigned int);
+ int (*permission) (struct mnt_idmap *, struct inode *, int, unsigned int);
struct posix_acl * (*get_inode_acl)(struct inode *, int, bool);
- int (*setattr) (struct dentry *, struct iattr *);
- int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
+ int (*setattr) (struct mnt_idmap *, struct dentry *, struct iattr *);
+ int (*getattr) (struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int);
ssize_t (*listxattr) (struct dentry *, char *, size_t);
int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
void (*update_time)(struct inode *, struct timespec *, int);
int (*atomic_open)(struct inode *, struct dentry *,
struct file *, unsigned open_flag,
umode_t create_mode);
- int (*tmpfile) (struct user_namespace *, struct inode *,
+ int (*tmpfile) (struct mnt_idmap *, struct inode *,
struct file *, umode_t);
- int (*fileattr_set)(struct user_namespace *mnt_userns,
+ int (*fileattr_set)(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
- struct posix_acl * (*get_acl)(struct user_namespace *, struct dentry *, int);
+ struct posix_acl * (*get_acl)(struct mnt_idmap *, struct dentry *, int);
locking rules:
all may block
@@ -135,7 +135,7 @@ prototypes::
struct inode *inode, const char *name, void *buffer,
size_t size);
int (*set)(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry, struct inode *inode, const char *name,
const void *buffer, size_t size, int flags);
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index 2c15e7053113..c53f30251a66 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -421,31 +421,31 @@ As of kernel 2.6.22, the following members are defined:
.. code-block:: c
struct inode_operations {
- int (*create) (struct user_namespace *, struct inode *,struct dentry *, umode_t, bool);
+ int (*create) (struct mnt_idmap *, struct inode *,struct dentry *, umode_t, bool);
struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
int (*link) (struct dentry *,struct inode *,struct dentry *);
int (*unlink) (struct inode *,struct dentry *);
- int (*symlink) (struct user_namespace *, struct inode *,struct dentry *,const char *);
- int (*mkdir) (struct user_namespace *, struct inode *,struct dentry *,umode_t);
+ int (*symlink) (struct mnt_idmap *, struct inode *,struct dentry *,const char *);
+ int (*mkdir) (struct mnt_idmap *, struct inode *,struct dentry *,umode_t);
int (*rmdir) (struct inode *,struct dentry *);
- int (*mknod) (struct user_namespace *, struct inode *,struct dentry *,umode_t,dev_t);
- int (*rename) (struct user_namespace *, struct inode *, struct dentry *,
+ int (*mknod) (struct mnt_idmap *, struct inode *,struct dentry *,umode_t,dev_t);
+ int (*rename) (struct mnt_idmap *, struct inode *, struct dentry *,
struct inode *, struct dentry *, unsigned int);
int (*readlink) (struct dentry *, char __user *,int);
const char *(*get_link) (struct dentry *, struct inode *,
struct delayed_call *);
- int (*permission) (struct user_namespace *, struct inode *, int);
+ int (*permission) (struct mnt_idmap *, struct inode *, int);
struct posix_acl * (*get_inode_acl)(struct inode *, int, bool);
- int (*setattr) (struct user_namespace *, struct dentry *, struct iattr *);
- int (*getattr) (struct user_namespace *, const struct path *, struct kstat *, u32, unsigned int);
+ int (*setattr) (struct mnt_idmap *, struct dentry *, struct iattr *);
+ int (*getattr) (struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int);
ssize_t (*listxattr) (struct dentry *, char *, size_t);
void (*update_time)(struct inode *, struct timespec *, int);
int (*atomic_open)(struct inode *, struct dentry *, struct file *,
unsigned open_flag, umode_t create_mode);
- int (*tmpfile) (struct user_namespace *, struct inode *, struct file *, umode_t);
- struct posix_acl * (*get_acl)(struct user_namespace *, struct dentry *, int);
- int (*set_acl)(struct user_namespace *, struct dentry *, struct posix_acl *, int);
- int (*fileattr_set)(struct user_namespace *mnt_userns,
+ int (*tmpfile) (struct mnt_idmap *, struct inode *, struct file *, umode_t);
+ struct posix_acl * (*get_acl)(struct mnt_idmap *, struct dentry *, int);
+ int (*set_acl)(struct mnt_idmap *, struct dentry *, struct posix_acl *, int);
+ int (*fileattr_set)(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
};
diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst
index 6b7368d1f516..38bc74eaa547 100644
--- a/Documentation/kbuild/makefiles.rst
+++ b/Documentation/kbuild/makefiles.rst
@@ -1042,7 +1042,7 @@ $(clean-files).
When executing "make clean", the file "crc32table.h" will be deleted.
Kbuild will assume files to be in the same relative directory as the
-Makefile, except if prefixed with $(objtree).
+Makefile.
To exclude certain files or directories from make clean, use the
$(no-clean-files) variable.
diff --git a/Documentation/networking/bridge.rst b/Documentation/networking/bridge.rst
index 4aef9cddde2f..c859f3c1636e 100644
--- a/Documentation/networking/bridge.rst
+++ b/Documentation/networking/bridge.rst
@@ -8,7 +8,7 @@ In order to use the Ethernet bridging functionality, you'll need the
userspace tools.
Documentation for Linux bridging is on:
- http://www.linuxfoundation.org/collaborate/workgroups/networking/bridge
+ https://wiki.linuxfoundation.org/networking/bridge
The bridge-utilities are maintained at:
git://git.kernel.org/pub/scm/linux/kernel/git/shemminger/bridge-utils.git
diff --git a/Documentation/networking/device_drivers/ethernet/intel/ice.rst b/Documentation/networking/device_drivers/ethernet/intel/ice.rst
index dc2e60ced927..b481b81f3be5 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/ice.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/ice.rst
@@ -819,7 +819,7 @@ NAPI
----
This driver supports NAPI (Rx polling mode).
For more information on NAPI, see
-https://www.linuxfoundation.org/collaborate/workgroups/networking/napi
+https://wiki.linuxfoundation.org/networking/napi
MACVLAN
diff --git a/Documentation/networking/device_drivers/ethernet/wangxun/txgbe.rst b/Documentation/networking/device_drivers/ethernet/wangxun/txgbe.rst
index eaa87dbe8848..d052ef40fe36 100644
--- a/Documentation/networking/device_drivers/ethernet/wangxun/txgbe.rst
+++ b/Documentation/networking/device_drivers/ethernet/wangxun/txgbe.rst
@@ -16,5 +16,5 @@ Contents
Support
=======
-If you got any problem, contact Wangxun support team via support@trustnetic.com
+If you got any problem, contact Wangxun support team via nic-support@net-swift.com
and Cc: netdev.
diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst
index 49db1d11d7c4..8b1045c3b59e 100644
--- a/Documentation/networking/nf_conntrack-sysctl.rst
+++ b/Documentation/networking/nf_conntrack-sysctl.rst
@@ -173,7 +173,9 @@ nf_conntrack_sctp_timeout_cookie_echoed - INTEGER (seconds)
default 3
nf_conntrack_sctp_timeout_established - INTEGER (seconds)
- default 432000 (5 days)
+ default 210
+
+ Default is set to (hb_interval * path_max_retrans + rto_max)
nf_conntrack_sctp_timeout_shutdown_sent - INTEGER (seconds)
default 0.3
@@ -190,12 +192,6 @@ nf_conntrack_sctp_timeout_heartbeat_sent - INTEGER (seconds)
This timeout is used to setup conntrack entry on secondary paths.
Default is set to hb_interval.
-nf_conntrack_sctp_timeout_heartbeat_acked - INTEGER (seconds)
- default 210
-
- This timeout is used to setup conntrack entry on secondary paths.
- Default is set to (hb_interval * path_max_retrans + rto_max)
-
nf_conntrack_udp_timeout - INTEGER (seconds)
default 30
diff --git a/Documentation/networking/rxrpc.rst b/Documentation/networking/rxrpc.rst
index 39494a6ea739..e1af54424192 100644
--- a/Documentation/networking/rxrpc.rst
+++ b/Documentation/networking/rxrpc.rst
@@ -880,8 +880,8 @@ The kernel interface functions are as follows:
notify_end_rx can be NULL or it can be used to specify a function to be
called when the call changes state to end the Tx phase. This function is
- called with the call-state spinlock held to prevent any reply or final ACK
- from being delivered first.
+ called with a spinlock held to prevent the last DATA packet from being
+ transmitted until the function returns.
(#) Receive data from a call::
diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst
index 1fa5ab8754d3..4a75686d35ab 100644
--- a/Documentation/process/maintainer-netdev.rst
+++ b/Documentation/process/maintainer-netdev.rst
@@ -2,9 +2,9 @@
.. _netdev-FAQ:
-==========
-netdev FAQ
-==========
+=============================
+Networking subsystem (netdev)
+=============================
tl;dr
-----
@@ -15,14 +15,15 @@ tl;dr
- don't repost your patches within one 24h period
- reverse xmas tree
-What is netdev?
----------------
-It is a mailing list for all network-related Linux stuff. This
+netdev
+------
+
+netdev is a mailing list for all network-related Linux stuff. This
includes anything found under net/ (i.e. core code like IPv6) and
drivers/net (i.e. hardware specific drivers) in the Linux source tree.
Note that some subsystems (e.g. wireless drivers) which have a high
-volume of traffic have their own specific mailing lists.
+volume of traffic have their own specific mailing lists and trees.
The netdev list is managed (like many other Linux mailing lists) through
VGER (http://vger.kernel.org/) with archives available at
@@ -32,32 +33,10 @@ Aside from subsystems like those mentioned above, all network-related
Linux development (i.e. RFC, review, comments, etc.) takes place on
netdev.
-How do the changes posted to netdev make their way into Linux?
---------------------------------------------------------------
-There are always two trees (git repositories) in play. Both are
-driven by David Miller, the main network maintainer. There is the
-``net`` tree, and the ``net-next`` tree. As you can probably guess from
-the names, the ``net`` tree is for fixes to existing code already in the
-mainline tree from Linus, and ``net-next`` is where the new code goes
-for the future release. You can find the trees here:
-
-- https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
-- https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
-
-How do I indicate which tree (net vs. net-next) my patch should be in?
-----------------------------------------------------------------------
-To help maintainers and CI bots you should explicitly mark which tree
-your patch is targeting. Assuming that you use git, use the prefix
-flag::
-
- git format-patch --subject-prefix='PATCH net-next' start..finish
+Development cycle
+-----------------
-Use ``net`` instead of ``net-next`` (always lower case) in the above for
-bug-fix ``net`` content.
-
-How often do changes from these trees make it to the mainline Linus tree?
--------------------------------------------------------------------------
-To understand this, you need to know a bit of background information on
+Here is a bit of background information on
the cadence of Linux development. Each new release starts off with a
two week "merge window" where the main maintainers feed their new stuff
to Linus for merging into the mainline tree. After the two weeks, the
@@ -69,9 +48,33 @@ rc2 is released. This repeats on a roughly weekly basis until rc7
state of churn), and a week after the last vX.Y-rcN was done, the
official vX.Y is released.
-Relating that to netdev: At the beginning of the 2-week merge window,
-the ``net-next`` tree will be closed - no new changes/features. The
-accumulated new content of the past ~10 weeks will be passed onto
+To find out where we are now in the cycle - load the mainline (Linus)
+page here:
+
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
+
+and note the top of the "tags" section. If it is rc1, it is early in
+the dev cycle. If it was tagged rc7 a week ago, then a release is
+probably imminent. If the most recent tag is a final release tag
+(without an ``-rcN`` suffix) - we are most likely in a merge window
+and ``net-next`` is closed.
+
+git trees and patch flow
+------------------------
+
+There are two networking trees (git repositories) in play. Both are
+driven by David Miller, the main network maintainer. There is the
+``net`` tree, and the ``net-next`` tree. As you can probably guess from
+the names, the ``net`` tree is for fixes to existing code already in the
+mainline tree from Linus, and ``net-next`` is where the new code goes
+for the future release. You can find the trees here:
+
+- https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
+- https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
+
+Relating that to kernel development: At the beginning of the 2-week
+merge window, the ``net-next`` tree will be closed - no new changes/features.
+The accumulated new content of the past ~10 weeks will be passed onto
mainline/Linus via a pull request for vX.Y -- at the same time, the
``net`` tree will start accumulating fixes for this pulled content
relating to vX.Y
@@ -103,22 +106,14 @@ focus for ``net`` is on stabilization and bug fixes.
Finally, the vX.Y gets released, and the whole cycle starts over.
-So where are we now in this cycle?
-----------------------------------
+netdev patch review
+-------------------
-Load the mainline (Linus) page here:
+Patch status
+~~~~~~~~~~~~
- https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
-
-and note the top of the "tags" section. If it is rc1, it is early in
-the dev cycle. If it was tagged rc7 a week ago, then a release is
-probably imminent. If the most recent tag is a final release tag
-(without an ``-rcN`` suffix) - we are most likely in a merge window
-and ``net-next`` is closed.
-
-How can I tell the status of a patch I've sent?
------------------------------------------------
-Start by looking at the main patchworks queue for netdev:
+Status of a patch can be checked by looking at the main patchwork
+queue for netdev:
https://patchwork.kernel.org/project/netdevbpf/list/
@@ -127,73 +122,141 @@ patch. Patches are indexed by the ``Message-ID`` header of the emails
which carried them so if you have trouble finding your patch append
the value of ``Message-ID`` to the URL above.
-How long before my patch is accepted?
--------------------------------------
-Generally speaking, the patches get triaged quickly (in less than
-48h). But be patient, if your patch is active in patchwork (i.e. it's
-listed on the project's patch list) the chances it was missed are close to zero.
-Asking the maintainer for status updates on your
-patch is a good way to ensure your patch is ignored or pushed to the
-bottom of the priority list.
+Updating patch status
+~~~~~~~~~~~~~~~~~~~~~
-Should I directly update patchwork state of my own patches?
------------------------------------------------------------
It may be tempting to help the maintainers and update the state of your
-own patches when you post a new version or spot a bug. Please do not do that.
+own patches when you post a new version or spot a bug. Please **do not**
+do that.
Interfering with the patch status on patchwork will only cause confusion. Leave
it to the maintainer to figure out what is the most recent and current
version that should be applied. If there is any doubt, the maintainer
will reply and ask what should be done.
-How do I divide my work into patches?
--------------------------------------
+Review timelines
+~~~~~~~~~~~~~~~~
-Put yourself in the shoes of the reviewer. Each patch is read separately
-and therefore should constitute a comprehensible step towards your stated
-goal.
+Generally speaking, the patches get triaged quickly (in less than
+48h). But be patient, if your patch is active in patchwork (i.e. it's
+listed on the project's patch list) the chances it was missed are close to zero.
+Asking the maintainer for status updates on your
+patch is a good way to ensure your patch is ignored or pushed to the
+bottom of the priority list.
-Avoid sending series longer than 15 patches. Larger series takes longer
-to review as reviewers will defer looking at it until they find a large
-chunk of time. A small series can be reviewed in a short time, so Maintainers
-just do it. As a result, a sequence of smaller series gets merged quicker and
-with better review coverage. Re-posting large series also increases the mailing
-list traffic.
+Partial resends
+~~~~~~~~~~~~~~~
-I made changes to only a few patches in a patch series should I resend only those changed?
-------------------------------------------------------------------------------------------
-No, please resend the entire patch series and make sure you do number your
+Please always resend the entire patch series and make sure you do number your
patches such that it is clear this is the latest and greatest set of patches
-that can be applied.
-
-I have received review feedback, when should I post a revised version of the patches?
--------------------------------------------------------------------------------------
-Allow at least 24 hours to pass between postings. This will ensure reviewers
-from all geographical locations have a chance to chime in. Do not wait
-too long (weeks) between postings either as it will make it harder for reviewers
-to recall all the context.
+that can be applied. Do not try to resend just the patches which changed.
-Make sure you address all the feedback in your new posting. Do not post a new
-version of the code if the discussion about the previous version is still
-ongoing, unless directly instructed by a reviewer.
+Handling misapplied patches
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
-I submitted multiple versions of a patch series and it looks like a version other than the last one has been accepted, what should I do?
-----------------------------------------------------------------------------------------------------------------------------------------
+Occasionally a patch series gets applied before receiving critical feedback,
+or the wrong version of a series gets applied.
There is no revert possible, once it is pushed out, it stays like that.
Please send incremental versions on top of what has been merged in order to fix
the patches the way they would look like if your latest patch series was to be
merged.
-Are there special rules regarding stable submissions on netdev?
----------------------------------------------------------------
+Stable tree
+~~~~~~~~~~~
+
While it used to be the case that netdev submissions were not supposed
to carry explicit ``CC: stable@vger.kernel.org`` tags that is no longer
the case today. Please follow the standard stable rules in
:ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>`,
and make sure you include appropriate Fixes tags!
-Is the comment style convention different for the networking content?
----------------------------------------------------------------------
-Yes, in a largely trivial way. Instead of this::
+Security fixes
+~~~~~~~~~~~~~~
+
+Do not email netdev maintainers directly if you think you discovered
+a bug that might have possible security implications.
+The current netdev maintainer has consistently requested that
+people use the mailing lists and not reach out directly. If you aren't
+OK with that, then perhaps consider mailing security@kernel.org or
+reading about http://oss-security.openwall.org/wiki/mailing-lists/distros
+as possible alternative mechanisms.
+
+
+Co-posting changes to user space components
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+User space code exercising kernel features should be posted
+alongside kernel patches. This gives reviewers a chance to see
+how any new interface is used and how well it works.
+
+When user space tools reside in the kernel repo itself all changes
+should generally come as one series. If series becomes too large
+or the user space project is not reviewed on netdev include a link
+to a public repo where user space patches can be seen.
+
+In case user space tooling lives in a separate repository but is
+reviewed on netdev (e.g. patches to ``iproute2`` tools) kernel and
+user space patches should form separate series (threads) when posted
+to the mailing list, e.g.::
+
+ [PATCH net-next 0/3] net: some feature cover letter
+ └─ [PATCH net-next 1/3] net: some feature prep
+ └─ [PATCH net-next 2/3] net: some feature do it
+ └─ [PATCH net-next 3/3] selftest: net: some feature
+
+ [PATCH iproute2-next] ip: add support for some feature
+
+Posting as one thread is discouraged because it confuses patchwork
+(as of patchwork 2.2.2).
+
+Preparing changes
+-----------------
+
+Attention to detail is important. Re-read your own work as if you were the
+reviewer. You can start with using ``checkpatch.pl``, perhaps even with
+the ``--strict`` flag. But do not be mindlessly robotic in doing so.
+If your change is a bug fix, make sure your commit log indicates the
+end-user visible symptom, the underlying reason as to why it happens,
+and then if necessary, explain why the fix proposed is the best way to
+get things done. Don't mangle whitespace, and as is common, don't
+mis-indent function arguments that span multiple lines. If it is your
+first patch, mail it to yourself so you can test apply it to an
+unpatched tree to confirm infrastructure didn't mangle it.
+
+Finally, go back and read
+:ref:`Documentation/process/submitting-patches.rst <submittingpatches>`
+to be sure you are not repeating some common mistake documented there.
+
+Indicating target tree
+~~~~~~~~~~~~~~~~~~~~~~
+
+To help maintainers and CI bots you should explicitly mark which tree
+your patch is targeting. Assuming that you use git, use the prefix
+flag::
+
+ git format-patch --subject-prefix='PATCH net-next' start..finish
+
+Use ``net`` instead of ``net-next`` (always lower case) in the above for
+bug-fix ``net`` content.
+
+Dividing work into patches
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Put yourself in the shoes of the reviewer. Each patch is read separately
+and therefore should constitute a comprehensible step towards your stated
+goal.
+
+Avoid sending series longer than 15 patches. Larger series takes longer
+to review as reviewers will defer looking at it until they find a large
+chunk of time. A small series can be reviewed in a short time, so Maintainers
+just do it. As a result, a sequence of smaller series gets merged quicker and
+with better review coverage. Re-posting large series also increases the mailing
+list traffic.
+
+Multi-line comments
+~~~~~~~~~~~~~~~~~~~
+
+Comment style convention is slightly different for networking and most of
+the tree. Instead of this::
/*
* foobar blah blah blah
@@ -206,8 +269,8 @@ it is requested that you make it look like this::
* another line of text
*/
-What is "reverse xmas tree"?
-----------------------------
+Local variable ordering ("reverse xmas tree", "RCS")
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Netdev has a convention for ordering local variables in functions.
Order the variable declaration lines longest to shortest, e.g.::
@@ -219,21 +282,31 @@ Order the variable declaration lines longest to shortest, e.g.::
If there are dependencies between the variables preventing the ordering
move the initialization out of line.
-I am working in existing code which uses non-standard formatting. Which formatting should I use?
-------------------------------------------------------------------------------------------------
-Make your code follow the most recent guidelines, so that eventually all code
+Format precedence
+~~~~~~~~~~~~~~~~~
+
+When working in existing code which uses nonstandard formatting make
+your code follow the most recent guidelines, so that eventually all code
in the domain of netdev is in the preferred format.
-I found a bug that might have possible security implications or similar. Should I mail the main netdev maintainer off-list?
----------------------------------------------------------------------------------------------------------------------------
-No. The current netdev maintainer has consistently requested that
-people use the mailing lists and not reach out directly. If you aren't
-OK with that, then perhaps consider mailing security@kernel.org or
-reading about http://oss-security.openwall.org/wiki/mailing-lists/distros
-as possible alternative mechanisms.
+Resending after review
+~~~~~~~~~~~~~~~~~~~~~~
+
+Allow at least 24 hours to pass between postings. This will ensure reviewers
+from all geographical locations have a chance to chime in. Do not wait
+too long (weeks) between postings either as it will make it harder for reviewers
+to recall all the context.
+
+Make sure you address all the feedback in your new posting. Do not post a new
+version of the code if the discussion about the previous version is still
+ongoing, unless directly instructed by a reviewer.
+
+Testing
+-------
+
+Expected level of testing
+~~~~~~~~~~~~~~~~~~~~~~~~~
-What level of testing is expected before I submit my change?
-------------------------------------------------------------
At the very minimum your changes must survive an ``allyesconfig`` and an
``allmodconfig`` build with ``W=1`` set without new warnings or failures.
@@ -244,86 +317,42 @@ and the patch series contains a set of kernel selftest for
You are expected to test your changes on top of the relevant networking
tree (``net`` or ``net-next``) and not e.g. a stable tree or ``linux-next``.
-How do I post corresponding changes to user space components?
--------------------------------------------------------------
-User space code exercising kernel features should be posted
-alongside kernel patches. This gives reviewers a chance to see
-how any new interface is used and how well it works.
-
-When user space tools reside in the kernel repo itself all changes
-should generally come as one series. If series becomes too large
-or the user space project is not reviewed on netdev include a link
-to a public repo where user space patches can be seen.
-
-In case user space tooling lives in a separate repository but is
-reviewed on netdev (e.g. patches to ``iproute2`` tools) kernel and
-user space patches should form separate series (threads) when posted
-to the mailing list, e.g.::
-
- [PATCH net-next 0/3] net: some feature cover letter
- └─ [PATCH net-next 1/3] net: some feature prep
- └─ [PATCH net-next 2/3] net: some feature do it
- └─ [PATCH net-next 3/3] selftest: net: some feature
-
- [PATCH iproute2-next] ip: add support for some feature
-
-Posting as one thread is discouraged because it confuses patchwork
-(as of patchwork 2.2.2).
-
-Can I reproduce the checks from patchwork on my local machine?
---------------------------------------------------------------
+patchwork checks
+~~~~~~~~~~~~~~~~
Checks in patchwork are mostly simple wrappers around existing kernel
scripts, the sources are available at:
https://github.com/kuba-moo/nipa/tree/master/tests
-Running all the builds and checks locally is a pain, can I post my patches and have the patchwork bot validate them?
---------------------------------------------------------------------------------------------------------------------
-
-No, you must ensure that your patches are ready by testing them locally
+**Do not** post your patches just to run them through the checks.
+You must ensure that your patches are ready by testing them locally
before posting to the mailing list. The patchwork build bot instance
gets overloaded very easily and netdev@vger really doesn't need more
traffic if we can help it.
-netdevsim is great, can I extend it for my out-of-tree tests?
--------------------------------------------------------------
+netdevsim
+~~~~~~~~~
-No, ``netdevsim`` is a test vehicle solely for upstream tests.
-(Please add your tests under ``tools/testing/selftests/``.)
+``netdevsim`` is a test driver which can be used to exercise driver
+configuration APIs without requiring capable hardware.
+Mock-ups and tests based on ``netdevsim`` are strongly encouraged when
+adding new APIs, but ``netdevsim`` in itself is **not** considered
+a use case/user. You must also implement the new APIs in a real driver.
-We also give no guarantees that ``netdevsim`` won't change in the future
+We give no guarantees that ``netdevsim`` won't change in the future
in a way which would break what would normally be considered uAPI.
-Is netdevsim considered a "user" of an API?
--------------------------------------------
-
-Linux kernel has a long standing rule that no API should be added unless
-it has a real, in-tree user. Mock-ups and tests based on ``netdevsim`` are
-strongly encouraged when adding new APIs, but ``netdevsim`` in itself
-is **not** considered a use case/user.
-
-Any other tips to help ensure my net/net-next patch gets OK'd?
---------------------------------------------------------------
-Attention to detail. Re-read your own work as if you were the
-reviewer. You can start with using ``checkpatch.pl``, perhaps even with
-the ``--strict`` flag. But do not be mindlessly robotic in doing so.
-If your change is a bug fix, make sure your commit log indicates the
-end-user visible symptom, the underlying reason as to why it happens,
-and then if necessary, explain why the fix proposed is the best way to
-get things done. Don't mangle whitespace, and as is common, don't
-mis-indent function arguments that span multiple lines. If it is your
-first patch, mail it to yourself so you can test apply it to an
-unpatched tree to confirm infrastructure didn't mangle it.
-
-Finally, go back and read
-:ref:`Documentation/process/submitting-patches.rst <submittingpatches>`
-to be sure you are not repeating some common mistake documented there.
+``netdevsim`` is reserved for use by upstream tests only, so any
+new ``netdevsim`` features must be accompanied by selftests under
+``tools/testing/selftests/``.
-My company uses peer feedback in employee performance reviews. Can I ask netdev maintainers for feedback?
----------------------------------------------------------------------------------------------------------
+Testimonials / feedback
+-----------------------
-Yes, especially if you spend significant amount of time reviewing code
+Some companies use peer feedback in employee performance reviews.
+Please feel free to request feedback from netdev maintainers,
+especially if you spend significant amount of time reviewing code
and go out of your way to improve shared infrastructure.
The feedback must be requested by you, the contributor, and will always
diff --git a/Documentation/sphinx/load_config.py b/Documentation/sphinx/load_config.py
index eeb394b39e2c..8b416bfd75ac 100644
--- a/Documentation/sphinx/load_config.py
+++ b/Documentation/sphinx/load_config.py
@@ -3,7 +3,7 @@
import os
import sys
-from sphinx.util.pycompat import execfile_
+from sphinx.util.osutil import fs_encoding
# ------------------------------------------------------------------------------
def loadConfig(namespace):
@@ -48,7 +48,9 @@ def loadConfig(namespace):
sys.stdout.write("load additional sphinx-config: %s\n" % config_file)
config = namespace.copy()
config['__file__'] = config_file
- execfile_(config_file, config)
+ with open(config_file, 'rb') as f:
+ code = compile(f.read(), fs_encoding, 'exec')
+ exec(code, config)
del config['__file__']
namespace.update(config)
else:
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index deb494f759ed..0a67cb738013 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -1354,6 +1354,14 @@ the memory region are automatically reflected into the guest. For example, an
mmap() that affects the region will be made visible immediately. Another
example is madvise(MADV_DROP).
+Note: On arm64, a write generated by the page-table walker (to update
+the Access and Dirty flags, for example) never results in a
+KVM_EXIT_MMIO exit when the slot has the KVM_MEM_READONLY flag. This
+is because KVM cannot provide the data that would be written by the
+page-table walker, making it impossible to emulate the access.
+Instead, an abort (data abort if the cause of the page-table update
+was a load or a store, instruction abort if it was an instruction
+fetch) is injected in the guest.
4.36 KVM_SET_TSS_ADDR
---------------------
@@ -8062,9 +8070,13 @@ considering the state as complete. VMM needs to ensure that the dirty
state is final and avoid missing dirty pages from another ioctl ordered
after the bitmap collection.
-NOTE: One example of using the backup bitmap is saving arm64 vgic/its
-tables through KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_SAVE_TABLES} command on
-KVM device "kvm-arm-vgic-its" when dirty ring is enabled.
+NOTE: Multiple examples of using the backup bitmap: (1) save vgic/its
+tables through command KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_SAVE_TABLES} on
+KVM device "kvm-arm-vgic-its". (2) restore vgic/its tables through
+command KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_RESTORE_TABLES} on KVM device
+"kvm-arm-vgic-its". VGICv3 LPI pending status is restored. (3) save
+vgic3 pending table through KVM_DEV_ARM_VGIC_{GRP_CTRL, SAVE_PENDING_TABLES}
+command on KVM device "kvm-arm-vgic-v3".
8.30 KVM_CAP_XEN_HVM
--------------------
@@ -8310,6 +8322,20 @@ CPU[EAX=1]:ECX[24] (TSC_DEADLINE) is not reported by ``KVM_GET_SUPPORTED_CPUID``
It can be enabled if ``KVM_CAP_TSC_DEADLINE_TIMER`` is present and the kernel
has enabled in-kernel emulation of the local APIC.
+CPU topology
+~~~~~~~~~~~~
+
+Several CPUID values include topology information for the host CPU:
+0x0b and 0x1f for Intel systems, 0x8000001e for AMD systems. Different
+versions of KVM return different values for this information and userspace
+should not rely on it. Currently they return all zeroes.
+
+If userspace wishes to set up a guest topology, it should be careful that
+the values of these three leaves differ for each CPU. In particular,
+the APIC ID is found in EDX for all subleaves of 0x0b and 0x1f, and in EAX
+for 0x8000001e; the latter also encodes the core id and node id in bits
+7:0 of EBX and ECX respectively.
+
Obsolete ioctls and capabilities
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst
index a3ca76f9be75..a0146793d197 100644
--- a/Documentation/virt/kvm/locking.rst
+++ b/Documentation/virt/kvm/locking.rst
@@ -24,21 +24,22 @@ The acquisition orders for mutexes are as follows:
For SRCU:
-- ``synchronize_srcu(&kvm->srcu)`` is called _inside_
- the kvm->slots_lock critical section, therefore kvm->slots_lock
- cannot be taken inside a kvm->srcu read-side critical section.
- Instead, kvm->slots_arch_lock is released before the call
- to ``synchronize_srcu()`` and _can_ be taken inside a
- kvm->srcu read-side critical section.
-
-- kvm->lock is taken inside kvm->srcu, therefore
- ``synchronize_srcu(&kvm->srcu)`` cannot be called inside
- a kvm->lock critical section. If you cannot delay the
- call until after kvm->lock is released, use ``call_srcu``.
+- ``synchronize_srcu(&kvm->srcu)`` is called inside critical sections
+ for kvm->lock, vcpu->mutex and kvm->slots_lock. These locks _cannot_
+ be taken inside a kvm->srcu read-side critical section; that is, the
+ following is broken::
+
+ srcu_read_lock(&kvm->srcu);
+ mutex_lock(&kvm->slots_lock);
+
+- kvm->slots_arch_lock instead is released before the call to
+ ``synchronize_srcu()``. It _can_ therefore be taken inside a
+ kvm->srcu read-side critical section, for example while processing
+ a vmexit.
On x86:
-- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock
+- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock and kvm->arch.xen.xen_lock
- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock and
kvm->arch.mmu_unsync_pages_lock are taken inside kvm->arch.mmu_lock, and
diff --git a/Documentation/x86/amd-memory-encryption.rst b/Documentation/x86/amd-memory-encryption.rst
index a1940ebe7be5..934310ce7258 100644
--- a/Documentation/x86/amd-memory-encryption.rst
+++ b/Documentation/x86/amd-memory-encryption.rst
@@ -95,3 +95,39 @@ by supplying mem_encrypt=on on the kernel command line. However, if BIOS does
not enable SME, then Linux will not be able to activate memory encryption, even
if configured to do so by default or the mem_encrypt=on command line parameter
is specified.
+
+Secure Nested Paging (SNP)
+==========================
+
+SEV-SNP introduces new features (SEV_FEATURES[1:63]) which can be enabled
+by the hypervisor for security enhancements. Some of these features need
+guest side implementation to function correctly. The below table lists the
+expected guest behavior with various possible scenarios of guest/hypervisor
+SNP feature support.
+
++-----------------+---------------+---------------+------------------+
+| Feature Enabled | Guest needs | Guest has | Guest boot |
+| by the HV | implementation| implementation| behaviour |
++=================+===============+===============+==================+
+| No | No | No | Boot |
+| | | | |
++-----------------+---------------+---------------+------------------+
+| No | Yes | No | Boot |
+| | | | |
++-----------------+---------------+---------------+------------------+
+| No | Yes | Yes | Boot |
+| | | | |
++-----------------+---------------+---------------+------------------+
+| Yes | No | No | Boot with |
+| | | | feature enabled |
++-----------------+---------------+---------------+------------------+
+| Yes | Yes | No | Graceful boot |
+| | | | failure |
++-----------------+---------------+---------------+------------------+
+| Yes | Yes | Yes | Boot with |
+| | | | feature enabled |
++-----------------+---------------+---------------+------------------+
+
+More details in AMD64 APM[1] Vol 2: 15.34.10 SEV_STATUS MSR
+
+[1] https://www.amd.com/system/files/TechDocs/40332.pdf
diff --git a/MAINTAINERS b/MAINTAINERS
index 7f86d02cb427..82938ca70466 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -383,7 +383,7 @@ ACPI COMPONENT ARCHITECTURE (ACPICA)
M: Robert Moore <robert.moore@intel.com>
M: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
L: linux-acpi@vger.kernel.org
-L: devel@acpica.org
+L: acpica-devel@lists.linuxfoundation.org
S: Supported
W: https://acpica.org/
W: https://github.com/acpica/acpica/
@@ -1097,14 +1097,12 @@ S: Maintained
F: drivers/dma/ptdma/
AMD SEATTLE DEVICE TREE SUPPORT
-M: Brijesh Singh <brijeshkumar.singh@amd.com>
M: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
M: Tom Lendacky <thomas.lendacky@amd.com>
S: Supported
F: arch/arm64/boot/dts/amd/
AMD XGBE DRIVER
-M: Tom Lendacky <thomas.lendacky@amd.com>
M: "Shyam Sundar S K" <Shyam-sundar.S-k@amd.com>
L: netdev@vger.kernel.org
S: Supported
@@ -2213,6 +2211,9 @@ L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git
X: drivers/media/i2c/
+F: arch/arm64/boot/dts/freescale/
+X: arch/arm64/boot/dts/freescale/fsl-*
+X: arch/arm64/boot/dts/freescale/qoriq-*
N: imx
N: mxs
@@ -2451,11 +2452,14 @@ F: drivers/rtc/rtc-mt7622.c
ARM/Mediatek SoC support
M: Matthias Brugger <matthias.bgg@gmail.com>
+R: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+L: linux-kernel@vger.kernel.org
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: linux-mediatek@lists.infradead.org (moderated for non-subscribers)
S: Maintained
W: https://mtk.wiki.kernel.org/
-C: irc://chat.freenode.net/linux-mediatek
+C: irc://irc.libera.chat/linux-mediatek
+F: arch/arm/boot/dts/mt2*
F: arch/arm/boot/dts/mt6*
F: arch/arm/boot/dts/mt7*
F: arch/arm/boot/dts/mt8*
@@ -2463,7 +2467,7 @@ F: arch/arm/mach-mediatek/
F: arch/arm64/boot/dts/mediatek/
F: drivers/soc/mediatek/
N: mtk
-N: mt[678]
+N: mt[2678]
K: mediatek
ARM/Mediatek USB3 PHY DRIVER
@@ -3767,7 +3771,6 @@ F: net/bluetooth/
BONDING DRIVER
M: Jay Vosburgh <j.vosburgh@gmail.com>
-M: Veaceslav Falico <vfalico@gmail.com>
M: Andy Gospodarek <andy@greyhouse.net>
L: netdev@vger.kernel.org
S: Supported
@@ -6948,7 +6951,7 @@ F: drivers/gpu/drm/atmel-hlcdc/
DRM DRIVERS FOR BRIDGE CHIPS
M: Andrzej Hajda <andrzej.hajda@intel.com>
M: Neil Armstrong <neil.armstrong@linaro.org>
-M: Robert Foss <robert.foss@linaro.org>
+M: Robert Foss <rfoss@kernel.org>
R: Laurent Pinchart <Laurent.pinchart@ideasonboard.com>
R: Jonas Karlman <jonas@kwiboo.se>
R: Jernej Skrabec <jernej.skrabec@gmail.com>
@@ -7616,7 +7619,6 @@ S: Maintained
F: drivers/firmware/efi/test/
EFI VARIABLE FILESYSTEM
-M: Matthew Garrett <matthew.garrett@nebula.com>
M: Jeremy Kerr <jk@ozlabs.org>
M: Ard Biesheuvel <ardb@kernel.org>
L: linux-efi@vger.kernel.org
@@ -7745,6 +7747,7 @@ R: Jeffle Xu <jefflexu@linux.alibaba.com>
L: linux-erofs@lists.ozlabs.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git
+F: Documentation/ABI/testing/sysfs-fs-erofs
F: Documentation/filesystems/erofs.rst
F: fs/erofs/
F: include/trace/events/erofs.h
@@ -7895,7 +7898,11 @@ F: include/linux/extcon/
EXTRA BOOT CONFIG
M: Masami Hiramatsu <mhiramat@kernel.org>
+L: linux-kernel@vger.kernel.org
+L: linux-trace-kernel@vger.kernel.org
+Q: https://patchwork.kernel.org/project/linux-trace-kernel/list/
S: Maintained
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git
F: Documentation/admin-guide/bootconfig.rst
F: fs/proc/bootconfig.c
F: include/linux/bootconfig.h
@@ -8196,7 +8203,7 @@ F: drivers/fpga/microchip-spi.c
FPU EMULATOR
M: Bill Metzenthen <billm@melbpc.org.au>
S: Maintained
-W: http://floatingpoint.sourceforge.net/emulator/index.html
+W: https://floatingpoint.billm.au/
F: arch/x86/math-emu/
FRAMEBUFFER CORE
@@ -8468,16 +8475,16 @@ F: fs/fscache/
F: include/linux/fscache*.h
FSCRYPT: FILE SYSTEM LEVEL ENCRYPTION SUPPORT
+M: Eric Biggers <ebiggers@kernel.org>
M: Theodore Y. Ts'o <tytso@mit.edu>
M: Jaegeuk Kim <jaegeuk@kernel.org>
-M: Eric Biggers <ebiggers@kernel.org>
L: linux-fscrypt@vger.kernel.org
S: Supported
Q: https://patchwork.kernel.org/project/linux-fscrypt/list/
-T: git git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt.git
+T: git https://git.kernel.org/pub/scm/fs/fscrypt/linux.git
F: Documentation/filesystems/fscrypt.rst
F: fs/crypto/
-F: include/linux/fscrypt*.h
+F: include/linux/fscrypt.h
F: include/uapi/linux/fscrypt.h
FSI SUBSYSTEM
@@ -8520,10 +8527,10 @@ F: include/linux/fsnotify*.h
FSVERITY: READ-ONLY FILE-BASED AUTHENTICITY PROTECTION
M: Eric Biggers <ebiggers@kernel.org>
M: Theodore Y. Ts'o <tytso@mit.edu>
-L: linux-fscrypt@vger.kernel.org
+L: fsverity@lists.linux.dev
S: Supported
-Q: https://patchwork.kernel.org/project/linux-fscrypt/list/
-T: git git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt.git fsverity
+Q: https://patchwork.kernel.org/project/fsverity/list/
+T: git https://git.kernel.org/pub/scm/fs/fsverity/linux.git
F: Documentation/filesystems/fsverity.rst
F: fs/verity/
F: include/linux/fsverity.h
@@ -8571,6 +8578,7 @@ F: kernel/trace/fgraph.c
F: arch/*/*/*/*ftrace*
F: arch/*/*/*ftrace*
F: include/*/ftrace.h
+F: samples/ftrace
FUNGIBLE ETHERNET DRIVERS
M: Dimitris Michailidis <dmichail@fungible.com>
@@ -9299,7 +9307,7 @@ F: net/dsa/tag_hellcreek.c
HISILICON DMA DRIVER
M: Zhou Wang <wangzhou1@hisilicon.com>
-M: Jie Hai <haijie1@hisilicon.com>
+M: Jie Hai <haijie1@huawei.com>
L: dmaengine@vger.kernel.org
S: Maintained
F: drivers/dma/hisi_dma.c
@@ -9989,7 +9997,7 @@ S: Maintained
T: git://git.kernel.org/pub/scm/linux/kernel/git/vfs/idmapping.git
F: Documentation/filesystems/idmappings.rst
F: tools/testing/selftests/mount_setattr/
-F: include/linux/mnt_idmapping.h
+F: include/linux/mnt_idmapping.*
IDT VersaClock 5 CLOCK DRIVER
M: Luca Ceresoli <luca@lucaceresoli.net>
@@ -11356,9 +11364,9 @@ F: virt/kvm/*
KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
M: Marc Zyngier <maz@kernel.org>
R: James Morse <james.morse@arm.com>
-R: Alexandru Elisei <alexandru.elisei@arm.com>
R: Suzuki K Poulose <suzuki.poulose@arm.com>
R: Oliver Upton <oliver.upton@linux.dev>
+R: Zenghui Yu <yuzenghui@huawei.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: kvmarm@lists.linux.dev
L: kvmarm@lists.cs.columbia.edu (deprecated, moderated for non-subscribers)
@@ -13620,7 +13628,7 @@ F: arch/microblaze/
MICROCHIP AT91 DMA DRIVERS
M: Ludovic Desroches <ludovic.desroches@microchip.com>
-M: Tudor Ambarus <tudor.ambarus@microchip.com>
+M: Tudor Ambarus <tudor.ambarus@linaro.org>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: dmaengine@vger.kernel.org
S: Supported
@@ -13665,7 +13673,7 @@ F: Documentation/devicetree/bindings/media/microchip,csi2dc.yaml
F: drivers/media/platform/microchip/microchip-csi2dc.c
MICROCHIP ECC DRIVER
-M: Tudor Ambarus <tudor.ambarus@microchip.com>
+M: Tudor Ambarus <tudor.ambarus@linaro.org>
L: linux-crypto@vger.kernel.org
S: Maintained
F: drivers/crypto/atmel-ecc.*
@@ -13762,7 +13770,7 @@ S: Maintained
F: drivers/mmc/host/atmel-mci.c
MICROCHIP NAND DRIVER
-M: Tudor Ambarus <tudor.ambarus@microchip.com>
+M: Tudor Ambarus <tudor.ambarus@linaro.org>
L: linux-mtd@lists.infradead.org
S: Supported
F: Documentation/devicetree/bindings/mtd/atmel-nand.txt
@@ -13814,7 +13822,7 @@ S: Supported
F: drivers/power/reset/at91-sama5d2_shdwc.c
MICROCHIP SPI DRIVER
-M: Tudor Ambarus <tudor.ambarus@microchip.com>
+M: Tudor Ambarus <tudor.ambarus@linaro.org>
S: Supported
F: drivers/spi/spi-atmel.*
@@ -14601,7 +14609,6 @@ F: tools/testing/selftests/net/ipsec.c
NETWORKING [IPv4/IPv6]
M: "David S. Miller" <davem@davemloft.net>
-M: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
M: David Ahern <dsahern@kernel.org>
L: netdev@vger.kernel.org
S: Maintained
@@ -14634,7 +14641,6 @@ F: net/netfilter/xt_SECMARK.c
F: net/netlabel/
NETWORKING [MPTCP]
-M: Mat Martineau <mathew.j.martineau@linux.intel.com>
M: Matthieu Baerts <matthieu.baerts@tessares.net>
L: netdev@vger.kernel.org
L: mptcp@lists.linux.dev
@@ -14919,7 +14925,8 @@ T: git://git.infradead.org/nvme.git
F: Documentation/nvme/
F: drivers/nvme/host/
F: drivers/nvme/common/
-F: include/linux/nvme*
+F: include/linux/nvme.h
+F: include/linux/nvme-*.h
F: include/uapi/linux/nvme_ioctl.h
NVM EXPRESS FABRICS AUTHENTICATION
@@ -15658,7 +15665,7 @@ OPENRISC ARCHITECTURE
M: Jonas Bonn <jonas@southpole.se>
M: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
M: Stafford Horne <shorne@gmail.com>
-L: openrisc@lists.librecores.org
+L: linux-openrisc@vger.kernel.org
S: Maintained
W: http://openrisc.io
T: git https://github.com/openrisc/linux.git
@@ -15749,6 +15756,12 @@ S: Maintained
W: https://wireless.wiki.kernel.org/en/users/Drivers/p54
F: drivers/net/wireless/intersil/p54/
+PACKET SOCKETS
+M: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
+S: Maintained
+F: include/uapi/linux/if_packet.h
+F: net/packet/af_packet.c
+
PACKING
M: Vladimir Oltean <olteanv@gmail.com>
L: netdev@vger.kernel.org
@@ -16108,7 +16121,7 @@ F: drivers/pci/controller/pci-v3-semi.c
PCI ENDPOINT SUBSYSTEM
M: Lorenzo Pieralisi <lpieralisi@kernel.org>
-R: Krzysztof Wilczyński <kw@linux.com>
+M: Krzysztof Wilczyński <kw@linux.com>
R: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
R: Kishon Vijay Abraham I <kishon@kernel.org>
L: linux-pci@vger.kernel.org
@@ -16116,7 +16129,7 @@ S: Supported
Q: https://patchwork.kernel.org/project/linux-pci/list/
B: https://bugzilla.kernel.org
C: irc://irc.oftc.net/linux-pci
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/pci.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git
F: Documentation/PCI/endpoint/*
F: Documentation/misc-devices/pci-endpoint-test.rst
F: drivers/misc/pci_endpoint_test.c
@@ -16151,7 +16164,7 @@ S: Supported
Q: https://patchwork.kernel.org/project/linux-pci/list/
B: https://bugzilla.kernel.org
C: irc://irc.oftc.net/linux-pci
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git
F: Documentation/driver-api/pci/p2pdma.rst
F: drivers/pci/p2pdma.c
F: include/linux/pci-p2pdma.h
@@ -16173,14 +16186,14 @@ F: drivers/pci/controller/pci-xgene-msi.c
PCI NATIVE HOST BRIDGE AND ENDPOINT DRIVERS
M: Lorenzo Pieralisi <lpieralisi@kernel.org>
+M: Krzysztof Wilczyński <kw@linux.com>
R: Rob Herring <robh@kernel.org>
-R: Krzysztof Wilczyński <kw@linux.com>
L: linux-pci@vger.kernel.org
S: Supported
Q: https://patchwork.kernel.org/project/linux-pci/list/
B: https://bugzilla.kernel.org
C: irc://irc.oftc.net/linux-pci
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/pci.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git
F: Documentation/devicetree/bindings/pci/
F: drivers/pci/controller/
F: drivers/pci/pci-bridge-emul.c
@@ -16193,7 +16206,7 @@ S: Supported
Q: https://patchwork.kernel.org/project/linux-pci/list/
B: https://bugzilla.kernel.org
C: irc://irc.oftc.net/linux-pci
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git
F: Documentation/PCI/
F: Documentation/devicetree/bindings/pci/
F: arch/x86/kernel/early-quirks.c
@@ -16610,6 +16623,13 @@ S: Supported
F: Documentation/devicetree/bindings/input/pine64,pinephone-keyboard.yaml
F: drivers/input/keyboard/pinephone-keyboard.c
+PKTCDVD DRIVER
+M: linux-block@vger.kernel.org
+S: Orphan
+F: drivers/block/pktcdvd.c
+F: include/linux/pktcdvd.h
+F: include/uapi/linux/pktcdvd.h
+
PLANTOWER PMS7003 AIR POLLUTION SENSOR DRIVER
M: Tomasz Duszynski <tduszyns@gmail.com>
S: Maintained
@@ -17230,7 +17250,7 @@ F: Documentation/devicetree/bindings/net/qcom,bam-dmux.yaml
F: drivers/net/wwan/qcom_bam_dmux.c
QUALCOMM CAMERA SUBSYSTEM DRIVER
-M: Robert Foss <robert.foss@linaro.org>
+M: Robert Foss <rfoss@kernel.org>
M: Todor Tomov <todor.too@gmail.com>
L: linux-media@vger.kernel.org
S: Maintained
@@ -17310,7 +17330,7 @@ F: drivers/dma/qcom/hidma*
QUALCOMM I2C CCI DRIVER
M: Loic Poulain <loic.poulain@linaro.org>
-M: Robert Foss <robert.foss@linaro.org>
+M: Robert Foss <rfoss@kernel.org>
L: linux-i2c@vger.kernel.org
L: linux-arm-msm@vger.kernel.org
S: Maintained
@@ -17949,6 +17969,7 @@ M: Albert Ou <aou@eecs.berkeley.edu>
L: linux-riscv@lists.infradead.org
S: Supported
Q: https://patchwork.kernel.org/project/linux-riscv/list/
+C: irc://irc.libera.chat/riscv
P: Documentation/riscv/patch-acceptance.rst
T: git git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git
F: arch/riscv/
@@ -18214,6 +18235,7 @@ L: rust-for-linux@vger.kernel.org
S: Supported
W: https://github.com/Rust-for-Linux/linux
B: https://github.com/Rust-for-Linux/linux/issues
+C: zulip://rust-for-linux.zulipchat.com
T: git https://github.com/Rust-for-Linux/linux.git rust-next
F: Documentation/rust/
F: rust/
@@ -18670,9 +18692,9 @@ F: drivers/target/
F: include/target/
SCTP PROTOCOL
-M: Vlad Yasevich <vyasevich@gmail.com>
M: Neil Horman <nhorman@tuxdriver.com>
M: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+M: Xin Long <lucien.xin@gmail.com>
L: linux-sctp@vger.kernel.org
S: Maintained
W: http://lksctp.sourceforge.net
@@ -19318,6 +19340,13 @@ L: alsa-devel@alsa-project.org (moderated for non-subscribers)
S: Orphan
F: sound/soc/uniphier/
+SOCKET TIMESTAMPING
+M: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
+S: Maintained
+F: Documentation/networking/timestamping.rst
+F: include/uapi/linux/net_tstamp.h
+F: tools/testing/selftests/net/so_txtime.c
+
SOEKRIS NET48XX LED SUPPORT
M: Chris Boot <bootc@bootc.net>
S: Maintained
@@ -19665,7 +19694,7 @@ F: drivers/clk/spear/
F: drivers/pinctrl/spear/
SPI NOR SUBSYSTEM
-M: Tudor Ambarus <tudor.ambarus@microchip.com>
+M: Tudor Ambarus <tudor.ambarus@linaro.org>
M: Pratyush Yadav <pratyush@kernel.org>
R: Michael Walle <michael@walle.cc>
L: linux-mtd@lists.infradead.org
@@ -20063,6 +20092,7 @@ F: drivers/watchdog/sunplus_wdt.c
SUPERH
M: Yoshinori Sato <ysato@users.sourceforge.jp>
M: Rich Felker <dalias@libc.org>
+M: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
L: linux-sh@vger.kernel.org
S: Maintained
Q: http://patchwork.kernel.org/project/linux-sh/list/
@@ -20295,8 +20325,7 @@ S: Maintained
F: drivers/platform/x86/system76_acpi.c
SYSV FILESYSTEM
-M: Christoph Hellwig <hch@infradead.org>
-S: Maintained
+S: Orphan
F: Documentation/filesystems/sysv-fs.rst
F: fs/sysv/
F: include/linux/sysv_fs.h
@@ -21703,6 +21732,7 @@ F: include/uapi/linux/uvcvideo.h
USB WEBCAM GADGET
M: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+M: Daniel Scally <dan.scally@ideasonboard.com>
L: linux-usb@vger.kernel.org
S: Maintained
F: drivers/usb/gadget/function/*uvc*
@@ -21738,6 +21768,13 @@ T: git git://linuxtv.org/media_tree.git
F: Documentation/admin-guide/media/zr364xx*
F: drivers/staging/media/deprecated/zr364xx/
+USER DATAGRAM PROTOCOL (UDP)
+M: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
+S: Maintained
+F: include/linux/udp.h
+F: net/ipv4/udp.c
+F: net/ipv6/udp.c
+
USER-MODE LINUX (UML)
M: Richard Weinberger <richard@nod.at>
M: Anton Ivanov <anton.ivanov@cambridgegreys.com>
@@ -21783,11 +21820,9 @@ W: http://en.wikipedia.org/wiki/Util-linux
T: git git://git.kernel.org/pub/scm/utils/util-linux/util-linux.git
UUID HELPERS
-M: Christoph Hellwig <hch@lst.de>
R: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
L: linux-kernel@vger.kernel.org
S: Maintained
-T: git git://git.infradead.org/users/hch/uuid.git
F: include/linux/uuid.h
F: include/uapi/linux/uuid.h
F: lib/test_uuid.c
@@ -22246,7 +22281,9 @@ F: drivers/scsi/vmw_pvscsi.c
F: drivers/scsi/vmw_pvscsi.h
VMWARE VIRTUAL PTP CLOCK DRIVER
-M: Vivek Thampi <vithampi@vmware.com>
+M: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>
+M: Deep Shah <sdeep@vmware.com>
+R: Alexey Makhalov <amakhalov@vmware.com>
R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
L: netdev@vger.kernel.org
S: Supported
diff --git a/Makefile b/Makefile
index c05b4fb7121e..8e5621ddadb2 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 2
SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION =
NAME = Hurr durr I'ma ninja sloth
# *DOCUMENTATION*
@@ -549,7 +549,7 @@ LDFLAGS_MODULE =
CFLAGS_KERNEL =
RUSTFLAGS_KERNEL =
AFLAGS_KERNEL =
-export LDFLAGS_vmlinux =
+LDFLAGS_vmlinux =
# Use USERINCLUDE when you must reference the UAPI directories only.
USERINCLUDE := \
@@ -1248,6 +1248,18 @@ vmlinux.o modules.builtin.modinfo modules.builtin: vmlinux_o
@:
PHONY += vmlinux
+# LDFLAGS_vmlinux in the top Makefile defines linker flags for the top vmlinux,
+# not for decompressors. LDFLAGS_vmlinux in arch/*/boot/compressed/Makefile is
+# unrelated; the decompressors just happen to have the same base name,
+# arch/*/boot/compressed/vmlinux.
+# Export LDFLAGS_vmlinux only to scripts/Makefile.vmlinux.
+#
+# _LDFLAGS_vmlinux is a workaround for the 'private export' bug:
+# https://savannah.gnu.org/bugs/?61463
+# For Make > 4.4, the following simple code will work:
+# vmlinux: private export LDFLAGS_vmlinux := $(LDFLAGS_vmlinux)
+vmlinux: private _LDFLAGS_vmlinux := $(LDFLAGS_vmlinux)
+vmlinux: export LDFLAGS_vmlinux = $(_LDFLAGS_vmlinux)
vmlinux: vmlinux.o $(KBUILD_LDS) modpost
$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.vmlinux
@@ -1533,6 +1545,7 @@ endif
# *.ko are usually independent of vmlinux, but CONFIG_DEBUG_INFOBTF_MODULES
# is an exception.
ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+KBUILD_BUILTIN := 1
modules: vmlinux
endif
@@ -1589,7 +1602,7 @@ endif # CONFIG_MODULES
CLEAN_FILES += include/ksym vmlinux.symvers modules-only.symvers \
modules.builtin modules.builtin.modinfo modules.nsdeps \
compile_commands.json .thinlto-cache rust/test rust/doc \
- .vmlinux.objs .vmlinux.export.c
+ rust-project.json .vmlinux.objs .vmlinux.export.c
# Directories & files removed with 'make mrproper'
MRPROPER_FILES += include/config include/generated \
@@ -1986,7 +1999,7 @@ $(single-no-ko): $(build-dir)
# Remove MODORDER when done because it is not the real one.
PHONY += single_modules
single_modules: $(single-no-ko) modules_prepare
- $(Q){ $(foreach m, $(single-ko), echo $(extmod_prefix)$m;) } > $(MODORDER)
+ $(Q){ $(foreach m, $(single-ko), echo $(extmod_prefix)$(m:%.ko=%.o);) } > $(MODORDER)
$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
ifneq ($(KBUILD_MODPOST_NOFINAL),1)
$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modfinal
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 4067f5169144..955b0362cdfb 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -132,7 +132,7 @@ AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W)
ifeq ($(CONFIG_THUMB2_KERNEL),y)
CFLAGS_ISA :=-Wa,-mimplicit-it=always $(AFLAGS_NOWARN)
-AFLAGS_ISA :=$(CFLAGS_ISA) -Wa$(comma)-mthumb -D__thumb2__=2
+AFLAGS_ISA :=$(CFLAGS_ISA) -Wa$(comma)-mthumb
CFLAGS_ISA +=-mthumb
else
CFLAGS_ISA :=$(call cc-option,-marm,) $(AFLAGS_NOWARN)
diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi
index 12933eff419f..446861b6b17b 100644
--- a/arch/arm/boot/dts/armada-38x.dtsi
+++ b/arch/arm/boot/dts/armada-38x.dtsi
@@ -304,7 +304,7 @@
};
gpio0: gpio@18100 {
- compatible = "marvell,armadaxp-gpio",
+ compatible = "marvell,armada-370-gpio",
"marvell,orion-gpio";
reg = <0x18100 0x40>, <0x181c0 0x08>;
reg-names = "gpio", "pwm";
@@ -323,7 +323,7 @@
};
gpio1: gpio@18140 {
- compatible = "marvell,armadaxp-gpio",
+ compatible = "marvell,armada-370-gpio",
"marvell,orion-gpio";
reg = <0x18140 0x40>, <0x181c8 0x08>;
reg-names = "gpio", "pwm";
diff --git a/arch/arm/boot/dts/armada-39x.dtsi b/arch/arm/boot/dts/armada-39x.dtsi
index 1e05208d9f34..9d1cac49c022 100644
--- a/arch/arm/boot/dts/armada-39x.dtsi
+++ b/arch/arm/boot/dts/armada-39x.dtsi
@@ -213,7 +213,7 @@
};
gpio0: gpio@18100 {
- compatible = "marvell,armadaxp-gpio", "marvell,orion-gpio";
+ compatible = "marvell,orion-gpio";
reg = <0x18100 0x40>;
ngpios = <32>;
gpio-controller;
@@ -227,7 +227,7 @@
};
gpio1: gpio@18140 {
- compatible = "marvell,armadaxp-gpio", "marvell,orion-gpio";
+ compatible = "marvell,orion-gpio";
reg = <0x18140 0x40>;
ngpios = <28>;
gpio-controller;
diff --git a/arch/arm/boot/dts/aspeed-bmc-ibm-bonnell.dts b/arch/arm/boot/dts/aspeed-bmc-ibm-bonnell.dts
index d1971ddf06a5..7f755e5a4624 100644
--- a/arch/arm/boot/dts/aspeed-bmc-ibm-bonnell.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-ibm-bonnell.dts
@@ -751,7 +751,7 @@
};
pca9849@75 {
- compatible = "nxp,pca849";
+ compatible = "nxp,pca9849";
reg = <0x75>;
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm/boot/dts/imx53-ppd.dts b/arch/arm/boot/dts/imx53-ppd.dts
index 37d0cffea99c..70c4a4852256 100644
--- a/arch/arm/boot/dts/imx53-ppd.dts
+++ b/arch/arm/boot/dts/imx53-ppd.dts
@@ -488,7 +488,7 @@
scl-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
status = "okay";
- i2c-switch@70 {
+ i2c-mux@70 {
compatible = "nxp,pca9547";
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm/boot/dts/imx6qdl-gw560x.dtsi b/arch/arm/boot/dts/imx6qdl-gw560x.dtsi
index 4bc4371e6bae..4b81a975c979 100644
--- a/arch/arm/boot/dts/imx6qdl-gw560x.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-gw560x.dtsi
@@ -632,7 +632,6 @@
&uart1 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_uart1>;
- uart-has-rtscts;
rts-gpios = <&gpio7 1 GPIO_ACTIVE_HIGH>;
status = "okay";
};
diff --git a/arch/arm/boot/dts/imx6ul-pico-dwarf.dts b/arch/arm/boot/dts/imx6ul-pico-dwarf.dts
index 162dc259edc8..5a74c7f68eb6 100644
--- a/arch/arm/boot/dts/imx6ul-pico-dwarf.dts
+++ b/arch/arm/boot/dts/imx6ul-pico-dwarf.dts
@@ -32,7 +32,7 @@
};
&i2c2 {
- clock_frequency = <100000>;
+ clock-frequency = <100000>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_i2c2>;
status = "okay";
diff --git a/arch/arm/boot/dts/imx7d-pico-dwarf.dts b/arch/arm/boot/dts/imx7d-pico-dwarf.dts
index 5162fe227d1e..fdc10563f147 100644
--- a/arch/arm/boot/dts/imx7d-pico-dwarf.dts
+++ b/arch/arm/boot/dts/imx7d-pico-dwarf.dts
@@ -32,7 +32,7 @@
};
&i2c1 {
- clock_frequency = <100000>;
+ clock-frequency = <100000>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_i2c1>;
status = "okay";
@@ -52,7 +52,7 @@
};
&i2c4 {
- clock_frequency = <100000>;
+ clock-frequency = <100000>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_i2c1>;
status = "okay";
diff --git a/arch/arm/boot/dts/imx7d-pico-nymph.dts b/arch/arm/boot/dts/imx7d-pico-nymph.dts
index 104a85254adb..5afb1674e012 100644
--- a/arch/arm/boot/dts/imx7d-pico-nymph.dts
+++ b/arch/arm/boot/dts/imx7d-pico-nymph.dts
@@ -43,7 +43,7 @@
};
&i2c1 {
- clock_frequency = <100000>;
+ clock-frequency = <100000>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_i2c1>;
status = "okay";
@@ -64,7 +64,7 @@
};
&i2c2 {
- clock_frequency = <100000>;
+ clock-frequency = <100000>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_i2c2>;
status = "okay";
diff --git a/arch/arm/boot/dts/imx7d-smegw01.dts b/arch/arm/boot/dts/imx7d-smegw01.dts
index 546268b8d0b1..c0f00f5db11e 100644
--- a/arch/arm/boot/dts/imx7d-smegw01.dts
+++ b/arch/arm/boot/dts/imx7d-smegw01.dts
@@ -198,6 +198,7 @@
&usbotg2 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_usbotg2>;
+ over-current-active-low;
dr_mode = "host";
status = "okay";
};
@@ -374,7 +375,7 @@
pinctrl_usbotg2: usbotg2grp {
fsl,pins = <
- MX7D_PAD_UART3_RTS_B__USB_OTG2_OC 0x04
+ MX7D_PAD_UART3_RTS_B__USB_OTG2_OC 0x5c
>;
};
diff --git a/arch/arm/boot/dts/nuvoton-wpcm450.dtsi b/arch/arm/boot/dts/nuvoton-wpcm450.dtsi
index b637241316bb..fd671c7a1e5d 100644
--- a/arch/arm/boot/dts/nuvoton-wpcm450.dtsi
+++ b/arch/arm/boot/dts/nuvoton-wpcm450.dtsi
@@ -480,6 +480,7 @@
reg = <0xc8000000 0x1000>, <0xc0000000 0x4000000>;
reg-names = "control", "memory";
clocks = <&clk 0>;
+ nuvoton,shm = <&shm>;
status = "disabled";
};
diff --git a/arch/arm/boot/dts/qcom-apq8084-ifc6540.dts b/arch/arm/boot/dts/qcom-apq8084-ifc6540.dts
index 44cd72f1b1be..116e59a3b76d 100644
--- a/arch/arm/boot/dts/qcom-apq8084-ifc6540.dts
+++ b/arch/arm/boot/dts/qcom-apq8084-ifc6540.dts
@@ -19,16 +19,16 @@
serial@f995e000 {
status = "okay";
};
+ };
+};
- sdhci@f9824900 {
- bus-width = <8>;
- non-removable;
- status = "okay";
- };
+&sdhc_1 {
+ bus-width = <8>;
+ non-removable;
+ status = "okay";
+};
- sdhci@f98a4900 {
- cd-gpios = <&tlmm 122 GPIO_ACTIVE_LOW>;
- bus-width = <4>;
- };
- };
+&sdhc_2 {
+ cd-gpios = <&tlmm 122 GPIO_ACTIVE_LOW>;
+ bus-width = <4>;
};
diff --git a/arch/arm/boot/dts/qcom-apq8084.dtsi b/arch/arm/boot/dts/qcom-apq8084.dtsi
index fe30abfff90a..4b0d2b4f4b6a 100644
--- a/arch/arm/boot/dts/qcom-apq8084.dtsi
+++ b/arch/arm/boot/dts/qcom-apq8084.dtsi
@@ -421,7 +421,7 @@
status = "disabled";
};
- mmc@f9824900 {
+ sdhc_1: mmc@f9824900 {
compatible = "qcom,apq8084-sdhci", "qcom,sdhci-msm-v4";
reg = <0xf9824900 0x11c>, <0xf9824000 0x800>;
reg-names = "hc", "core";
@@ -434,7 +434,7 @@
status = "disabled";
};
- mmc@f98a4900 {
+ sdhc_2: mmc@f98a4900 {
compatible = "qcom,apq8084-sdhci", "qcom,sdhci-msm-v4";
reg = <0xf98a4900 0x11c>, <0xf98a4000 0x800>;
reg-names = "hc", "core";
diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index 487b0e03d4b4..2ca76b69add7 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -1181,6 +1181,7 @@
clock-names = "dp", "pclk";
phys = <&edp_phy>;
phy-names = "dp";
+ power-domains = <&power RK3288_PD_VIO>;
resets = <&cru SRST_EDP>;
reset-names = "dp";
rockchip,grf = <&grf>;
diff --git a/arch/arm/boot/dts/sam9x60.dtsi b/arch/arm/boot/dts/sam9x60.dtsi
index 8f5477e307dd..37a5d96aaf64 100644
--- a/arch/arm/boot/dts/sam9x60.dtsi
+++ b/arch/arm/boot/dts/sam9x60.dtsi
@@ -564,7 +564,7 @@
mpddrc: mpddrc@ffffe800 {
compatible = "microchip,sam9x60-ddramc", "atmel,sama5d3-ddramc";
reg = <0xffffe800 0x200>;
- clocks = <&pmc PMC_TYPE_SYSTEM 2>, <&pmc PMC_TYPE_CORE PMC_MCK>;
+ clocks = <&pmc PMC_TYPE_SYSTEM 2>, <&pmc PMC_TYPE_PERIPHERAL 49>;
clock-names = "ddrck", "mpddr";
};
diff --git a/arch/arm/boot/dts/stihxxx-b2120.dtsi b/arch/arm/boot/dts/stihxxx-b2120.dtsi
index 920a0bad7494..8d9a2dfa76f1 100644
--- a/arch/arm/boot/dts/stihxxx-b2120.dtsi
+++ b/arch/arm/boot/dts/stihxxx-b2120.dtsi
@@ -178,7 +178,7 @@
tsin-num = <0>;
serial-not-parallel;
i2c-bus = <&ssc2>;
- reset-gpios = <&pio15 4 GPIO_ACTIVE_HIGH>;
+ reset-gpios = <&pio15 4 GPIO_ACTIVE_LOW>;
dvb-card = <STV0367_TDA18212_NIMA_1>;
};
};
diff --git a/arch/arm/boot/dts/stm32mp151a-prtt1l.dtsi b/arch/arm/boot/dts/stm32mp151a-prtt1l.dtsi
index d865ab5d866b..dd23de85100c 100644
--- a/arch/arm/boot/dts/stm32mp151a-prtt1l.dtsi
+++ b/arch/arm/boot/dts/stm32mp151a-prtt1l.dtsi
@@ -101,8 +101,12 @@
&qspi {
pinctrl-names = "default", "sleep";
- pinctrl-0 = <&qspi_clk_pins_a &qspi_bk1_pins_a>;
- pinctrl-1 = <&qspi_clk_sleep_pins_a &qspi_bk1_sleep_pins_a>;
+ pinctrl-0 = <&qspi_clk_pins_a
+ &qspi_bk1_pins_a
+ &qspi_cs1_pins_a>;
+ pinctrl-1 = <&qspi_clk_sleep_pins_a
+ &qspi_bk1_sleep_pins_a
+ &qspi_cs1_sleep_pins_a>;
reg = <0x58003000 0x1000>, <0x70000000 0x4000000>;
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm/boot/dts/stm32mp157c-emstamp-argon.dtsi b/arch/arm/boot/dts/stm32mp157c-emstamp-argon.dtsi
index aef02e6421a3..7d11c50b9e40 100644
--- a/arch/arm/boot/dts/stm32mp157c-emstamp-argon.dtsi
+++ b/arch/arm/boot/dts/stm32mp157c-emstamp-argon.dtsi
@@ -391,8 +391,12 @@
&qspi {
pinctrl-names = "default", "sleep";
- pinctrl-0 = <&qspi_clk_pins_a &qspi_bk1_pins_a>;
- pinctrl-1 = <&qspi_clk_sleep_pins_a &qspi_bk1_sleep_pins_a>;
+ pinctrl-0 = <&qspi_clk_pins_a
+ &qspi_bk1_pins_a
+ &qspi_cs1_pins_a>;
+ pinctrl-1 = <&qspi_clk_sleep_pins_a
+ &qspi_bk1_sleep_pins_a
+ &qspi_cs1_sleep_pins_a>;
reg = <0x58003000 0x1000>, <0x70000000 0x4000000>;
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi
index 002f221f1694..c06edd2eacb0 100644
--- a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi
+++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi
@@ -428,8 +428,12 @@
&qspi {
pinctrl-names = "default", "sleep";
- pinctrl-0 = <&qspi_clk_pins_a &qspi_bk1_pins_a>;
- pinctrl-1 = <&qspi_clk_sleep_pins_a &qspi_bk1_sleep_pins_a>;
+ pinctrl-0 = <&qspi_clk_pins_a
+ &qspi_bk1_pins_a
+ &qspi_cs1_pins_a>;
+ pinctrl-1 = <&qspi_clk_sleep_pins_a
+ &qspi_bk1_sleep_pins_a
+ &qspi_cs1_sleep_pins_a>;
reg = <0x58003000 0x1000>, <0x70000000 0x4000000>;
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi
index 134a798ad3f2..bb40fb46da81 100644
--- a/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi
+++ b/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi
@@ -247,8 +247,12 @@
&qspi {
pinctrl-names = "default", "sleep";
- pinctrl-0 = <&qspi_clk_pins_a &qspi_bk1_pins_a>;
- pinctrl-1 = <&qspi_clk_sleep_pins_a &qspi_bk1_sleep_pins_a>;
+ pinctrl-0 = <&qspi_clk_pins_a
+ &qspi_bk1_pins_a
+ &qspi_cs1_pins_a>;
+ pinctrl-1 = <&qspi_clk_sleep_pins_a
+ &qspi_bk1_sleep_pins_a
+ &qspi_cs1_sleep_pins_a>;
reg = <0x58003000 0x1000>, <0x70000000 0x200000>;
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts
index 42ed4a04a12e..6280c5e86a12 100644
--- a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts
+++ b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts
@@ -345,7 +345,7 @@
};
&i2c2 {
- tca9548@70 {
+ i2c-mux@70 {
compatible = "nxp,pca9548";
pinctrl-0 = <&pinctrl_i2c_mux_reset>;
pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
index f892977da9e4..c00d39562a10 100644
--- a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
+++ b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
@@ -340,7 +340,7 @@
};
&i2c2 {
- tca9548@70 {
+ i2c-mux@70 {
compatible = "nxp,pca9548";
pinctrl-0 = <&pinctrl_i2c_mux_reset>;
pinctrl-names = "default";
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 971e74546fb1..13e62c7c25dc 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -53,7 +53,12 @@ $(obj)/%-core.S: $(src)/%-armv4.pl
clean-files += poly1305-core.S sha256-core.S sha512-core.S
+aflags-thumb2-$(CONFIG_THUMB2_KERNEL) := -U__thumb2__ -D__thumb2__=1
+
+AFLAGS_sha256-core.o += $(aflags-thumb2-y)
+AFLAGS_sha512-core.o += $(aflags-thumb2-y)
+
# massage the perlasm code a bit so we only get the NEON routine if we need it
poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5
poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7
-AFLAGS_poly1305-core.o += $(poly1305-aflags-y)
+AFLAGS_poly1305-core.o += $(poly1305-aflags-y) $(aflags-thumb2-y)
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index aecc403b2880..7f092cb55a41 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -128,15 +128,16 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
#define TIF_NEED_RESCHED 1 /* rescheduling necessary */
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
#define TIF_UPROBE 3 /* breakpointed or singlestepping */
-#define TIF_SYSCALL_TRACE 4 /* syscall trace active */
-#define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */
-#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
-#define TIF_SECCOMP 7 /* seccomp syscall filtering active */
-#define TIF_NOTIFY_SIGNAL 8 /* signal notifications exist */
+#define TIF_NOTIFY_SIGNAL 4 /* signal notifications exist */
#define TIF_USING_IWMMXT 17
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
-#define TIF_RESTORE_SIGMASK 20
+#define TIF_RESTORE_SIGMASK 19
+#define TIF_SYSCALL_TRACE 20 /* syscall trace active */
+#define TIF_SYSCALL_AUDIT 21 /* syscall auditing active */
+#define TIF_SYSCALL_TRACEPOINT 22 /* syscall tracepoint instrumentation */
+#define TIF_SECCOMP 23 /* seccomp syscall filtering active */
+
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index 68112c172025..006163195d67 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -73,6 +73,7 @@
#include <linux/syscalls.h>
#include <linux/errno.h>
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/cred.h>
#include <linux/fcntl.h>
#include <linux/eventpoll.h>
diff --git a/arch/arm/mach-footbridge/isa-rtc.c b/arch/arm/mach-footbridge/isa-rtc.c
index b8f741a3a37e..237b828dd2f1 100644
--- a/arch/arm/mach-footbridge/isa-rtc.c
+++ b/arch/arm/mach-footbridge/isa-rtc.c
@@ -20,7 +20,6 @@
#include <linux/init.h>
#include <linux/mc146818rtc.h>
-#include <linux/bcd.h>
#include <linux/io.h>
#include "common.h"
diff --git a/arch/arm/mach-imx/cpu-imx25.c b/arch/arm/mach-imx/cpu-imx25.c
index 3e63445cde06..cc86977d0a34 100644
--- a/arch/arm/mach-imx/cpu-imx25.c
+++ b/arch/arm/mach-imx/cpu-imx25.c
@@ -23,6 +23,7 @@ static int mx25_read_cpu_rev(void)
np = of_find_compatible_node(NULL, NULL, "fsl,imx25-iim");
iim_base = of_iomap(np, 0);
+ of_node_put(np);
BUG_ON(!iim_base);
rev = readl(iim_base + MXC_IIMSREV);
iounmap(iim_base);
diff --git a/arch/arm/mach-imx/cpu-imx27.c b/arch/arm/mach-imx/cpu-imx27.c
index bf70e13bbe9e..1d2893908368 100644
--- a/arch/arm/mach-imx/cpu-imx27.c
+++ b/arch/arm/mach-imx/cpu-imx27.c
@@ -28,6 +28,7 @@ static int mx27_read_cpu_rev(void)
np = of_find_compatible_node(NULL, NULL, "fsl,imx27-ccm");
ccm_base = of_iomap(np, 0);
+ of_node_put(np);
BUG_ON(!ccm_base);
/*
* now we have access to the IO registers. As we need
diff --git a/arch/arm/mach-imx/cpu-imx31.c b/arch/arm/mach-imx/cpu-imx31.c
index b9c24b851d1a..35c544924e50 100644
--- a/arch/arm/mach-imx/cpu-imx31.c
+++ b/arch/arm/mach-imx/cpu-imx31.c
@@ -39,6 +39,7 @@ static int mx31_read_cpu_rev(void)
np = of_find_compatible_node(NULL, NULL, "fsl,imx31-iim");
iim_base = of_iomap(np, 0);
+ of_node_put(np);
BUG_ON(!iim_base);
/* read SREV register from IIM module */
diff --git a/arch/arm/mach-imx/cpu-imx35.c b/arch/arm/mach-imx/cpu-imx35.c
index 80e7d8ab9f1b..1fe75b39c2d9 100644
--- a/arch/arm/mach-imx/cpu-imx35.c
+++ b/arch/arm/mach-imx/cpu-imx35.c
@@ -21,6 +21,7 @@ static int mx35_read_cpu_rev(void)
np = of_find_compatible_node(NULL, NULL, "fsl,imx35-iim");
iim_base = of_iomap(np, 0);
+ of_node_put(np);
BUG_ON(!iim_base);
rev = imx_readl(iim_base + MXC_IIMSREV);
diff --git a/arch/arm/mach-imx/cpu-imx5.c b/arch/arm/mach-imx/cpu-imx5.c
index ad56263778f9..a67c89bf155d 100644
--- a/arch/arm/mach-imx/cpu-imx5.c
+++ b/arch/arm/mach-imx/cpu-imx5.c
@@ -28,6 +28,7 @@ static u32 imx5_read_srev_reg(const char *compat)
np = of_find_compatible_node(NULL, NULL, compat);
iim_base = of_iomap(np, 0);
+ of_node_put(np);
WARN_ON(!iim_base);
srev = readl(iim_base + IIM_SREV) & 0xff;
diff --git a/arch/arm/mach-omap1/Kconfig b/arch/arm/mach-omap1/Kconfig
index 538a960257cc..7ec7ada287e0 100644
--- a/arch/arm/mach-omap1/Kconfig
+++ b/arch/arm/mach-omap1/Kconfig
@@ -4,6 +4,7 @@ menuconfig ARCH_OMAP1
depends on ARCH_MULTI_V4T || ARCH_MULTI_V5
depends on CPU_LITTLE_ENDIAN
depends on ATAGS
+ select ARCH_OMAP
select ARCH_HAS_HOLES_MEMORYMODEL
select ARCH_OMAP
select CLKSRC_MMIO
@@ -45,10 +46,6 @@ config ARCH_OMAP16XX
select CPU_ARM926T
select OMAP_DM_TIMER
-config ARCH_OMAP1_ANY
- select ARCH_OMAP
- def_bool ARCH_OMAP730 || ARCH_OMAP850 || ARCH_OMAP15XX || ARCH_OMAP16XX
-
config ARCH_OMAP
bool
diff --git a/arch/arm/mach-omap1/Makefile b/arch/arm/mach-omap1/Makefile
index 506074b86333..0615cb0ba580 100644
--- a/arch/arm/mach-omap1/Makefile
+++ b/arch/arm/mach-omap1/Makefile
@@ -3,8 +3,6 @@
# Makefile for the linux kernel.
#
-ifdef CONFIG_ARCH_OMAP1_ANY
-
# Common support
obj-y := io.o id.o sram-init.o sram.o time.o irq.o mux.o flash.o \
serial.o devices.o dma.o omap-dma.o fb.o
@@ -59,5 +57,3 @@ obj-$(CONFIG_ARCH_OMAP730) += gpio7xx.o
obj-$(CONFIG_ARCH_OMAP850) += gpio7xx.o
obj-$(CONFIG_ARCH_OMAP15XX) += gpio15xx.o
obj-$(CONFIG_ARCH_OMAP16XX) += gpio16xx.o
-
-endif
diff --git a/arch/arm/mach-omap1/gpio15xx.c b/arch/arm/mach-omap1/gpio15xx.c
index c675f11de99d..61fa26efd865 100644
--- a/arch/arm/mach-omap1/gpio15xx.c
+++ b/arch/arm/mach-omap1/gpio15xx.c
@@ -11,6 +11,7 @@
#include <linux/gpio.h>
#include <linux/platform_data/gpio-omap.h>
#include <linux/soc/ti/omap1-soc.h>
+#include <asm/irq.h>
#include "irqs.h"
diff --git a/arch/arm/mach-omap1/io.c b/arch/arm/mach-omap1/io.c
index d2db9b8aed3f..0074b011a05a 100644
--- a/arch/arm/mach-omap1/io.c
+++ b/arch/arm/mach-omap1/io.c
@@ -22,17 +22,14 @@
* The machine specific code may provide the extra mapping besides the
* default mapping provided here.
*/
-static struct map_desc omap_io_desc[] __initdata = {
+#if defined (CONFIG_ARCH_OMAP730) || defined (CONFIG_ARCH_OMAP850)
+static struct map_desc omap7xx_io_desc[] __initdata = {
{
.virtual = OMAP1_IO_VIRT,
.pfn = __phys_to_pfn(OMAP1_IO_PHYS),
.length = OMAP1_IO_SIZE,
.type = MT_DEVICE
- }
-};
-
-#if defined (CONFIG_ARCH_OMAP730) || defined (CONFIG_ARCH_OMAP850)
-static struct map_desc omap7xx_io_desc[] __initdata = {
+ },
{
.virtual = OMAP7XX_DSP_BASE,
.pfn = __phys_to_pfn(OMAP7XX_DSP_START),
@@ -50,6 +47,12 @@ static struct map_desc omap7xx_io_desc[] __initdata = {
#ifdef CONFIG_ARCH_OMAP15XX
static struct map_desc omap1510_io_desc[] __initdata = {
{
+ .virtual = OMAP1_IO_VIRT,
+ .pfn = __phys_to_pfn(OMAP1_IO_PHYS),
+ .length = OMAP1_IO_SIZE,
+ .type = MT_DEVICE
+ },
+ {
.virtual = OMAP1510_DSP_BASE,
.pfn = __phys_to_pfn(OMAP1510_DSP_START),
.length = OMAP1510_DSP_SIZE,
@@ -66,6 +69,12 @@ static struct map_desc omap1510_io_desc[] __initdata = {
#if defined(CONFIG_ARCH_OMAP16XX)
static struct map_desc omap16xx_io_desc[] __initdata = {
{
+ .virtual = OMAP1_IO_VIRT,
+ .pfn = __phys_to_pfn(OMAP1_IO_PHYS),
+ .length = OMAP1_IO_SIZE,
+ .type = MT_DEVICE
+ },
+ {
.virtual = OMAP16XX_DSP_BASE,
.pfn = __phys_to_pfn(OMAP16XX_DSP_START),
.length = OMAP16XX_DSP_SIZE,
@@ -79,18 +88,9 @@ static struct map_desc omap16xx_io_desc[] __initdata = {
};
#endif
-/*
- * Maps common IO regions for omap1
- */
-static void __init omap1_map_common_io(void)
-{
- iotable_init(omap_io_desc, ARRAY_SIZE(omap_io_desc));
-}
-
#if defined (CONFIG_ARCH_OMAP730) || defined (CONFIG_ARCH_OMAP850)
void __init omap7xx_map_io(void)
{
- omap1_map_common_io();
iotable_init(omap7xx_io_desc, ARRAY_SIZE(omap7xx_io_desc));
}
#endif
@@ -98,7 +98,6 @@ void __init omap7xx_map_io(void)
#ifdef CONFIG_ARCH_OMAP15XX
void __init omap15xx_map_io(void)
{
- omap1_map_common_io();
iotable_init(omap1510_io_desc, ARRAY_SIZE(omap1510_io_desc));
}
#endif
@@ -106,7 +105,6 @@ void __init omap15xx_map_io(void)
#if defined(CONFIG_ARCH_OMAP16XX)
void __init omap16xx_map_io(void)
{
- omap1_map_common_io();
iotable_init(omap16xx_io_desc, ARRAY_SIZE(omap16xx_io_desc));
}
#endif
diff --git a/arch/arm/mach-omap1/mcbsp.c b/arch/arm/mach-omap1/mcbsp.c
index 05c25c432449..b1632cbe37e6 100644
--- a/arch/arm/mach-omap1/mcbsp.c
+++ b/arch/arm/mach-omap1/mcbsp.c
@@ -89,7 +89,6 @@ static struct omap_mcbsp_ops omap1_mcbsp_ops = {
#define OMAP1610_MCBSP2_BASE 0xfffb1000
#define OMAP1610_MCBSP3_BASE 0xe1017000
-#if defined(CONFIG_ARCH_OMAP730) || defined(CONFIG_ARCH_OMAP850)
struct resource omap7xx_mcbsp_res[][6] = {
{
{
@@ -159,14 +158,7 @@ static struct omap_mcbsp_platform_data omap7xx_mcbsp_pdata[] = {
};
#define OMAP7XX_MCBSP_RES_SZ ARRAY_SIZE(omap7xx_mcbsp_res[1])
#define OMAP7XX_MCBSP_COUNT ARRAY_SIZE(omap7xx_mcbsp_res)
-#else
-#define omap7xx_mcbsp_res_0 NULL
-#define omap7xx_mcbsp_pdata NULL
-#define OMAP7XX_MCBSP_RES_SZ 0
-#define OMAP7XX_MCBSP_COUNT 0
-#endif
-#ifdef CONFIG_ARCH_OMAP15XX
struct resource omap15xx_mcbsp_res[][6] = {
{
{
@@ -266,14 +258,7 @@ static struct omap_mcbsp_platform_data omap15xx_mcbsp_pdata[] = {
};
#define OMAP15XX_MCBSP_RES_SZ ARRAY_SIZE(omap15xx_mcbsp_res[1])
#define OMAP15XX_MCBSP_COUNT ARRAY_SIZE(omap15xx_mcbsp_res)
-#else
-#define omap15xx_mcbsp_res_0 NULL
-#define omap15xx_mcbsp_pdata NULL
-#define OMAP15XX_MCBSP_RES_SZ 0
-#define OMAP15XX_MCBSP_COUNT 0
-#endif
-#ifdef CONFIG_ARCH_OMAP16XX
struct resource omap16xx_mcbsp_res[][6] = {
{
{
@@ -373,12 +358,6 @@ static struct omap_mcbsp_platform_data omap16xx_mcbsp_pdata[] = {
};
#define OMAP16XX_MCBSP_RES_SZ ARRAY_SIZE(omap16xx_mcbsp_res[1])
#define OMAP16XX_MCBSP_COUNT ARRAY_SIZE(omap16xx_mcbsp_res)
-#else
-#define omap16xx_mcbsp_res_0 NULL
-#define omap16xx_mcbsp_pdata NULL
-#define OMAP16XX_MCBSP_RES_SZ 0
-#define OMAP16XX_MCBSP_COUNT 0
-#endif
static void omap_mcbsp_register_board_cfg(struct resource *res, int res_count,
struct omap_mcbsp_platform_data *config, int size)
diff --git a/arch/arm/mach-omap1/pm.h b/arch/arm/mach-omap1/pm.h
index d9165709c532..0d1f092821ff 100644
--- a/arch/arm/mach-omap1/pm.h
+++ b/arch/arm/mach-omap1/pm.h
@@ -106,13 +106,6 @@
#define OMAP7XX_IDLECT3 0xfffece24
#define OMAP7XX_IDLE_LOOP_REQUEST 0x0C00
-#if !defined(CONFIG_ARCH_OMAP730) && \
- !defined(CONFIG_ARCH_OMAP850) && \
- !defined(CONFIG_ARCH_OMAP15XX) && \
- !defined(CONFIG_ARCH_OMAP16XX)
-#warning "Power management for this processor not implemented yet"
-#endif
-
#ifndef __ASSEMBLER__
#include <linux/clk.h>
diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig
index b90d98bae68d..03e25af6f48c 100644
--- a/arch/arm/mach-pxa/Kconfig
+++ b/arch/arm/mach-pxa/Kconfig
@@ -45,6 +45,8 @@ config MACH_PXA27X_DT
config MACH_PXA3XX_DT
bool "Support PXA3xx platforms from device tree"
select CPU_PXA300
+ select CPU_PXA310
+ select CPU_PXA320
select PINCTRL
select POWER_SUPPLY
select PXA3xx
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index c1494a4dee25..53f2d8774fdb 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -161,7 +161,7 @@ void __init paging_init(const struct machine_desc *mdesc)
mpu_setup();
/* allocate the zero page. */
- zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ zero_page = (void *)memblock_alloc(PAGE_SIZE, PAGE_SIZE);
if (!zero_page)
panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
__func__, PAGE_SIZE, PAGE_SIZE);
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index fa6999e24b07..e43f6d716b4b 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -6,6 +6,7 @@
* VM_EXEC
*/
#include <asm/asm-offsets.h>
+#include <asm/pgtable.h>
#include <asm/thread_info.h>
#ifdef CONFIG_CPU_V7M
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 03934808b2ed..c5ccca26a408 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -184,8 +184,6 @@ config ARM64
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
- select HAVE_DYNAMIC_FTRACE_WITH_ARGS \
- if $(cc-option,-fpatchable-function-entry=2)
select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \
if DYNAMIC_FTRACE_WITH_ARGS
select HAVE_EFFICIENT_UNALIGNED_ACCESS
@@ -972,6 +970,22 @@ config ARM64_ERRATUM_2457168
If unsure, say Y.
+config ARM64_ERRATUM_2645198
+ bool "Cortex-A715: 2645198: Workaround possible [ESR|FAR]_ELx corruption"
+ default y
+ help
+ This option adds the workaround for ARM Cortex-A715 erratum 2645198.
+
+ If a Cortex-A715 cpu sees a page mapping permissions change from executable
+ to non-executable, it may corrupt the ESR_ELx and FAR_ELx registers on the
+ next instruction abort caused by permission fault.
+
+ Only user-space does executable to non-executable permission transition via
+ mprotect() system call. Workaround the problem by doing a break-before-make
+ TLB invalidation, for all changes to executable user space mappings.
+
+ If unsure, say Y.
+
config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y
diff --git a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi
index 1648e67afbb6..417523dc4cc0 100644
--- a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi
@@ -1886,7 +1886,7 @@
sd_emmc_b: sd@5000 {
compatible = "amlogic,meson-axg-mmc";
reg = <0x0 0x5000 0x0 0x800>;
- interrupts = <GIC_SPI 217 IRQ_TYPE_EDGE_RISING>;
+ interrupts = <GIC_SPI 217 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
clocks = <&clkc CLKID_SD_EMMC_B>,
<&clkc CLKID_SD_EMMC_B_CLK0>,
@@ -1898,7 +1898,7 @@
sd_emmc_c: mmc@7000 {
compatible = "amlogic,meson-axg-mmc";
reg = <0x0 0x7000 0x0 0x800>;
- interrupts = <GIC_SPI 218 IRQ_TYPE_EDGE_RISING>;
+ interrupts = <GIC_SPI 218 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
clocks = <&clkc CLKID_SD_EMMC_C>,
<&clkc CLKID_SD_EMMC_C_CLK0>,
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
index 9dbd50820b1c..7f55d97f6c28 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
@@ -2324,7 +2324,7 @@
sd_emmc_a: sd@ffe03000 {
compatible = "amlogic,meson-axg-mmc";
reg = <0x0 0xffe03000 0x0 0x800>;
- interrupts = <GIC_SPI 189 IRQ_TYPE_EDGE_RISING>;
+ interrupts = <GIC_SPI 189 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
clocks = <&clkc CLKID_SD_EMMC_A>,
<&clkc CLKID_SD_EMMC_A_CLK0>,
@@ -2336,7 +2336,7 @@
sd_emmc_b: sd@ffe05000 {
compatible = "amlogic,meson-axg-mmc";
reg = <0x0 0xffe05000 0x0 0x800>;
- interrupts = <GIC_SPI 190 IRQ_TYPE_EDGE_RISING>;
+ interrupts = <GIC_SPI 190 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
clocks = <&clkc CLKID_SD_EMMC_B>,
<&clkc CLKID_SD_EMMC_B_CLK0>,
@@ -2348,7 +2348,7 @@
sd_emmc_c: mmc@ffe07000 {
compatible = "amlogic,meson-axg-mmc";
reg = <0x0 0xffe07000 0x0 0x800>;
- interrupts = <GIC_SPI 191 IRQ_TYPE_EDGE_RISING>;
+ interrupts = <GIC_SPI 191 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
clocks = <&clkc CLKID_SD_EMMC_C>,
<&clkc CLKID_SD_EMMC_C_CLK0>,
diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
index e3c12e0be99d..5eed15035b67 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
@@ -603,21 +603,21 @@
sd_emmc_a: mmc@70000 {
compatible = "amlogic,meson-gx-mmc", "amlogic,meson-gxbb-mmc";
reg = <0x0 0x70000 0x0 0x800>;
- interrupts = <GIC_SPI 216 IRQ_TYPE_EDGE_RISING>;
+ interrupts = <GIC_SPI 216 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
};
sd_emmc_b: mmc@72000 {
compatible = "amlogic,meson-gx-mmc", "amlogic,meson-gxbb-mmc";
reg = <0x0 0x72000 0x0 0x800>;
- interrupts = <GIC_SPI 217 IRQ_TYPE_EDGE_RISING>;
+ interrupts = <GIC_SPI 217 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
};
sd_emmc_c: mmc@74000 {
compatible = "amlogic,meson-gx-mmc", "amlogic,meson-gxbb-mmc";
reg = <0x0 0x74000 0x0 0x800>;
- interrupts = <GIC_SPI 218 IRQ_TYPE_EDGE_RISING>;
+ interrupts = <GIC_SPI 218 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
};
};
diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid-hc4.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid-hc4.dts
index e3486f60645a..a1f0c38ccadd 100644
--- a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid-hc4.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid-hc4.dts
@@ -131,10 +131,6 @@
};
&usb {
- phys = <&usb2_phy1>;
- phy-names = "usb2-phy1";
-};
-
-&usb2_phy0 {
- status = "disabled";
+ phys = <&usb2_phy0>, <&usb2_phy1>;
+ phy-names = "usb2-phy0", "usb2-phy1";
};
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1012a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1012a-qds.dts
index 5a8d85a7d161..bbdf989058ff 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1012a-qds.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1012a-qds.dts
@@ -110,7 +110,7 @@
&i2c0 {
status = "okay";
- pca9547@77 {
+ i2c-mux@77 {
compatible = "nxp,pca9547";
reg = <0x77>;
#address-cells = <1>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts
index 9b726c2a4842..dda27ed7aaf2 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts
@@ -89,7 +89,7 @@
&i2c0 {
status = "okay";
- pca9547@77 {
+ i2c-mux@77 {
compatible = "nxp,pca9547";
reg = <0x77>;
#address-cells = <1>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts
index b2fcbba60d3a..3b0ed9305f2b 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts
@@ -88,7 +88,7 @@
&i2c0 {
status = "okay";
- pca9547@77 {
+ i2c-mux@77 {
compatible = "nxp,pca9547";
reg = <0x77>;
#address-cells = <1>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1088a-qds.dts
index 41d8b15f25a5..aa52ff73ff9e 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1088a-qds.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a-qds.dts
@@ -53,7 +53,7 @@
&i2c0 {
status = "okay";
- i2c-switch@77 {
+ i2c-mux@77 {
compatible = "nxp,pca9547";
reg = <0x77>;
#address-cells = <1>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts
index 1bfbce69cc8b..ee8e932628d1 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts
@@ -136,7 +136,7 @@
&i2c0 {
status = "okay";
- i2c-switch@77 {
+ i2c-mux@77 {
compatible = "nxp,pca9547";
reg = <0x77>;
#address-cells = <1>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts b/arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts
index ef6c8967533e..d4867d6cf47c 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts
@@ -245,7 +245,7 @@
&i2c3 {
status = "okay";
- i2c-switch@70 {
+ i2c-mux@70 {
compatible = "nxp,pca9540";
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi
index f598669e742f..52c5a43b30a0 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi
@@ -103,7 +103,7 @@
&i2c0 {
status = "okay";
- pca9547@77 {
+ i2c-mux@77 {
compatible = "nxp,pca9547";
reg = <0x77>;
#address-cells = <1>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa-rdb.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa-rdb.dtsi
index 3d9647b3da14..537cecb13dd0 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls208xa-rdb.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa-rdb.dtsi
@@ -44,7 +44,7 @@
&i2c0 {
status = "okay";
- pca9547@75 {
+ i2c-mux@75 {
compatible = "nxp,pca9547";
reg = <0x75>;
#address-cells = <1>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a-cex7.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a-cex7.dtsi
index afb455210bd0..d32a52ab00a4 100644
--- a/arch/arm64/boot/dts/freescale/fsl-lx2160a-cex7.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a-cex7.dtsi
@@ -54,7 +54,7 @@
&i2c0 {
status = "okay";
- i2c-switch@77 {
+ i2c-mux@77 {
compatible = "nxp,pca9547";
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/freescale/imx8dxl.dtsi b/arch/arm64/boot/dts/freescale/imx8dxl.dtsi
index 0c64b9194621..214f21bd0cb4 100644
--- a/arch/arm64/boot/dts/freescale/imx8dxl.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8dxl.dtsi
@@ -164,7 +164,7 @@
sc_pwrkey: keys {
compatible = "fsl,imx8qxp-sc-key", "fsl,imx-sc-key";
- linux,keycode = <KEY_POWER>;
+ linux,keycodes = <KEY_POWER>;
wakeup-source;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi
index 03266bd90a06..169f047fbca5 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi
@@ -120,7 +120,7 @@
&ecspi2 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_espi2>;
- cs-gpios = <&gpio5 9 GPIO_ACTIVE_LOW>;
+ cs-gpios = <&gpio5 13 GPIO_ACTIVE_LOW>;
status = "okay";
eeprom@0 {
@@ -316,7 +316,7 @@
MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0x82
MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0x82
MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0x82
- MX8MM_IOMUXC_ECSPI1_SS0_GPIO5_IO9 0x41
+ MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0x41
>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-data-modul-edm-sbc.dts b/arch/arm64/boot/dts/freescale/imx8mm-data-modul-edm-sbc.dts
index 24f61db33eba..9889319d4f04 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-data-modul-edm-sbc.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mm-data-modul-edm-sbc.dts
@@ -88,6 +88,7 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_watchdog_gpio>;
compatible = "linux,wdt-gpio";
+ always-running;
gpios = <&gpio1 8 GPIO_ACTIVE_HIGH>;
hw_algo = "level";
/* Reset triggers in 2..3 seconds */
@@ -275,7 +276,7 @@
compatible = "rohm,bd71847";
reg = <0x4b>;
#clock-cells = <0>;
- clocks = <&clk_xtal32k 0>;
+ clocks = <&clk_xtal32k>;
clock-output-names = "clk-32k-out";
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_pmic>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts b/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts
index 74c09891600f..6357078185ed 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts
@@ -214,7 +214,7 @@
pinctrl-0 = <&pinctrl_i2c3>;
status = "okay";
- i2cmux@70 {
+ i2c-mux@70 {
compatible = "nxp,pca9540";
reg = <0x70>;
#address-cells = <1>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-pinfunc.h b/arch/arm64/boot/dts/freescale/imx8mm-pinfunc.h
index 83c8f715cd90..b1f11098d248 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-pinfunc.h
+++ b/arch/arm64/boot/dts/freescale/imx8mm-pinfunc.h
@@ -602,7 +602,7 @@
#define MX8MM_IOMUXC_UART1_RXD_GPIO5_IO22 0x234 0x49C 0x000 0x5 0x0
#define MX8MM_IOMUXC_UART1_RXD_TPSMP_HDATA24 0x234 0x49C 0x000 0x7 0x0
#define MX8MM_IOMUXC_UART1_TXD_UART1_DCE_TX 0x238 0x4A0 0x000 0x0 0x0
-#define MX8MM_IOMUXC_UART1_TXD_UART1_DTE_RX 0x238 0x4A0 0x4F4 0x0 0x0
+#define MX8MM_IOMUXC_UART1_TXD_UART1_DTE_RX 0x238 0x4A0 0x4F4 0x0 0x1
#define MX8MM_IOMUXC_UART1_TXD_ECSPI3_MOSI 0x238 0x4A0 0x000 0x1 0x0
#define MX8MM_IOMUXC_UART1_TXD_GPIO5_IO23 0x238 0x4A0 0x000 0x5 0x0
#define MX8MM_IOMUXC_UART1_TXD_TPSMP_HDATA25 0x238 0x4A0 0x000 0x7 0x0
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx-0x-rs232-rts.dtso b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx-0x-rs232-rts.dtso
index 3ea73a6886ff..f6ad1a4b8b66 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx-0x-rs232-rts.dtso
+++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx-0x-rs232-rts.dtso
@@ -33,7 +33,6 @@
pinctrl-0 = <&pinctrl_uart2>;
rts-gpios = <&gpio5 29 GPIO_ACTIVE_LOW>;
cts-gpios = <&gpio5 28 GPIO_ACTIVE_LOW>;
- uart-has-rtscts;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx-0x-rs232-rts.dtso b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx-0x-rs232-rts.dtso
index 2fa635e1c1a8..1f8ea20dfafc 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx-0x-rs232-rts.dtso
+++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx-0x-rs232-rts.dtso
@@ -33,7 +33,6 @@
pinctrl-0 = <&pinctrl_uart2>;
rts-gpios = <&gpio5 29 GPIO_ACTIVE_LOW>;
cts-gpios = <&gpio5 28 GPIO_ACTIVE_LOW>;
- uart-has-rtscts;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi
index 244ef8d6cc68..7761d5671cb1 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi
@@ -222,7 +222,6 @@
pinctrl-0 = <&pinctrl_uart3>, <&pinctrl_bten>;
cts-gpios = <&gpio5 8 GPIO_ACTIVE_LOW>;
rts-gpios = <&gpio5 9 GPIO_ACTIVE_LOW>;
- uart-has-rtscts;
status = "okay";
bluetooth {
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts
index 750a1f07ecb7..64b366e83fa1 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts
@@ -733,7 +733,6 @@
dtr-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>;
dsr-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>;
dcd-gpios = <&gpio1 11 GPIO_ACTIVE_LOW>;
- uart-has-rtscts;
status = "okay";
};
@@ -749,7 +748,6 @@
pinctrl-0 = <&pinctrl_uart3>, <&pinctrl_uart3_gpio>;
cts-gpios = <&gpio4 10 GPIO_ACTIVE_LOW>;
rts-gpios = <&gpio4 9 GPIO_ACTIVE_LOW>;
- uart-has-rtscts;
status = "okay";
};
@@ -758,7 +756,6 @@
pinctrl-0 = <&pinctrl_uart4>, <&pinctrl_uart4_gpio>;
cts-gpios = <&gpio5 11 GPIO_ACTIVE_LOW>;
rts-gpios = <&gpio5 12 GPIO_ACTIVE_LOW>;
- uart-has-rtscts;
status = "okay";
};
@@ -771,6 +768,7 @@
&usbotg2 {
dr_mode = "host";
vbus-supply = <&reg_usb2_vbus>;
+ over-current-active-low;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7902.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7902.dts
index 32872b0b1aaf..e8bc1fccc47b 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7902.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7902.dts
@@ -664,7 +664,6 @@
pinctrl-0 = <&pinctrl_uart1>, <&pinctrl_uart1_gpio>;
rts-gpios = <&gpio4 10 GPIO_ACTIVE_LOW>;
cts-gpios = <&gpio4 24 GPIO_ACTIVE_LOW>;
- uart-has-rtscts;
status = "okay";
};
@@ -681,7 +680,6 @@
pinctrl-0 = <&pinctrl_uart3>, <&pinctrl_uart3_gpio>;
rts-gpios = <&gpio2 1 GPIO_ACTIVE_LOW>;
cts-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>;
- uart-has-rtscts;
status = "okay";
bluetooth {
@@ -699,7 +697,6 @@
dtr-gpios = <&gpio4 3 GPIO_ACTIVE_LOW>;
dsr-gpios = <&gpio4 4 GPIO_ACTIVE_LOW>;
dcd-gpios = <&gpio4 6 GPIO_ACTIVE_LOW>;
- uart-has-rtscts;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts
index 8ce562246a08..acc2ba8e00a8 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts
@@ -581,7 +581,6 @@
dtr-gpios = <&gpio1 0 GPIO_ACTIVE_LOW>;
dsr-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>;
dcd-gpios = <&gpio3 24 GPIO_ACTIVE_LOW>;
- uart-has-rtscts;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin-dahlia.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin-dahlia.dtsi
index c2a5c2f7b204..7c3f5c54f040 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-verdin-dahlia.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin-dahlia.dtsi
@@ -9,6 +9,7 @@
simple-audio-card,bitclock-master = <&dailink_master>;
simple-audio-card,format = "i2s";
simple-audio-card,frame-master = <&dailink_master>;
+ simple-audio-card,mclk-fs = <256>;
simple-audio-card,name = "imx8mm-wm8904";
simple-audio-card,routing =
"Headphone Jack", "HPOUTL",
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin-dev.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin-dev.dtsi
index 73cc3fafa018..b2bcd2282170 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-verdin-dev.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin-dev.dtsi
@@ -11,6 +11,7 @@
simple-audio-card,bitclock-master = <&dailink_master>;
simple-audio-card,format = "i2s";
simple-audio-card,frame-master = <&dailink_master>;
+ simple-audio-card,mclk-fs = <256>;
simple-audio-card,name = "imx8mm-nau8822";
simple-audio-card,routing =
"Headphones", "LHP",
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
index 0d454e0e2f7c..702d87621bb4 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
@@ -98,6 +98,7 @@
off-on-delay = <500000>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_reg_eth>;
+ regulator-always-on;
regulator-boot-on;
regulator-max-microvolt = <3300000>;
regulator-min-microvolt = <3300000>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mn-venice-gw7902.dts b/arch/arm64/boot/dts/freescale/imx8mn-venice-gw7902.dts
index b9444e4a3d2d..7c12518dbc96 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn-venice-gw7902.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mn-venice-gw7902.dts
@@ -643,7 +643,6 @@
pinctrl-0 = <&pinctrl_uart3>, <&pinctrl_uart3_gpio>;
rts-gpios = <&gpio2 1 GPIO_ACTIVE_LOW>;
cts-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>;
- uart-has-rtscts;
status = "okay";
bluetooth {
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts
index d4c7ca16abd0..f2d93437084b 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts
@@ -36,8 +36,8 @@
pcie0_refclk: pcie0-refclk {
compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <100000000>;
+ #clock-cells = <0>;
+ clock-frequency = <100000000>;
};
reg_can1_stby: regulator-can1-stby {
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi
index 79b290a002c1..ecc4bce6db97 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi
@@ -99,7 +99,6 @@
regulators {
buck1: BUCK1 {
- regulator-compatible = "BUCK1";
regulator-min-microvolt = <600000>;
regulator-max-microvolt = <2187500>;
regulator-boot-on;
@@ -108,7 +107,6 @@
};
buck2: BUCK2 {
- regulator-compatible = "BUCK2";
regulator-min-microvolt = <600000>;
regulator-max-microvolt = <2187500>;
regulator-boot-on;
@@ -119,7 +117,6 @@
};
buck4: BUCK4 {
- regulator-compatible = "BUCK4";
regulator-min-microvolt = <600000>;
regulator-max-microvolt = <3400000>;
regulator-boot-on;
@@ -127,7 +124,6 @@
};
buck5: BUCK5 {
- regulator-compatible = "BUCK5";
regulator-min-microvolt = <600000>;
regulator-max-microvolt = <3400000>;
regulator-boot-on;
@@ -135,7 +131,6 @@
};
buck6: BUCK6 {
- regulator-compatible = "BUCK6";
regulator-min-microvolt = <600000>;
regulator-max-microvolt = <3400000>;
regulator-boot-on;
@@ -143,7 +138,6 @@
};
ldo1: LDO1 {
- regulator-compatible = "LDO1";
regulator-min-microvolt = <1600000>;
regulator-max-microvolt = <3300000>;
regulator-boot-on;
@@ -151,7 +145,6 @@
};
ldo2: LDO2 {
- regulator-compatible = "LDO2";
regulator-min-microvolt = <800000>;
regulator-max-microvolt = <1150000>;
regulator-boot-on;
@@ -159,7 +152,6 @@
};
ldo3: LDO3 {
- regulator-compatible = "LDO3";
regulator-min-microvolt = <800000>;
regulator-max-microvolt = <3300000>;
regulator-boot-on;
@@ -167,13 +159,11 @@
};
ldo4: LDO4 {
- regulator-compatible = "LDO4";
regulator-min-microvolt = <800000>;
regulator-max-microvolt = <3300000>;
};
ldo5: LDO5 {
- regulator-compatible = "LDO5";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <3300000>;
regulator-boot-on;
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
index ceeca4966fc5..8eb7d5ee38da 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
@@ -623,7 +623,6 @@
pinctrl-0 = <&pinctrl_uart3>, <&pinctrl_uart3_gpio>;
cts-gpios = <&gpio3 21 GPIO_ACTIVE_LOW>;
rts-gpios = <&gpio3 22 GPIO_ACTIVE_LOW>;
- uart-has-rtscts;
status = "okay";
bluetooth {
diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
index 7a6e6221f421..03034b439c1f 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
@@ -524,6 +524,7 @@
compatible = "fsl,imx8mp-gpc";
reg = <0x303a0000 0x1000>;
interrupt-parent = <&gic>;
+ interrupts = <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>;
interrupt-controller;
#interrupt-cells = <3>;
@@ -590,7 +591,7 @@
reg = <IMX8MP_POWER_DOMAIN_MIPI_PHY2>;
};
- pgc_hsiomix: power-domains@17 {
+ pgc_hsiomix: power-domain@17 {
#power-domain-cells = <0>;
reg = <IMX8MP_POWER_DOMAIN_HSIOMIX>;
clocks = <&clk IMX8MP_CLK_HSIO_AXI>,
@@ -1297,7 +1298,7 @@
reg = <0x32f10100 0x8>,
<0x381f0000 0x20>;
clocks = <&clk IMX8MP_CLK_HSIO_ROOT>,
- <&clk IMX8MP_CLK_USB_ROOT>;
+ <&clk IMX8MP_CLK_USB_SUSP>;
clock-names = "hsio", "suspend";
interrupts = <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
power-domains = <&hsio_blk_ctrl IMX8MP_HSIOBLK_PD_USB>;
@@ -1310,9 +1311,9 @@
usb_dwc3_0: usb@38100000 {
compatible = "snps,dwc3";
reg = <0x38100000 0x10000>;
- clocks = <&clk IMX8MP_CLK_HSIO_AXI>,
+ clocks = <&clk IMX8MP_CLK_USB_ROOT>,
<&clk IMX8MP_CLK_USB_CORE_REF>,
- <&clk IMX8MP_CLK_USB_ROOT>;
+ <&clk IMX8MP_CLK_USB_SUSP>;
clock-names = "bus_early", "ref", "suspend";
interrupts = <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>;
phys = <&usb3_phy0>, <&usb3_phy0>;
@@ -1339,7 +1340,7 @@
reg = <0x32f10108 0x8>,
<0x382f0000 0x20>;
clocks = <&clk IMX8MP_CLK_HSIO_ROOT>,
- <&clk IMX8MP_CLK_USB_ROOT>;
+ <&clk IMX8MP_CLK_USB_SUSP>;
clock-names = "hsio", "suspend";
interrupts = <GIC_SPI 149 IRQ_TYPE_LEVEL_HIGH>;
power-domains = <&hsio_blk_ctrl IMX8MP_HSIOBLK_PD_USB>;
@@ -1352,9 +1353,9 @@
usb_dwc3_1: usb@38200000 {
compatible = "snps,dwc3";
reg = <0x38200000 0x10000>;
- clocks = <&clk IMX8MP_CLK_HSIO_AXI>,
+ clocks = <&clk IMX8MP_CLK_USB_ROOT>,
<&clk IMX8MP_CLK_USB_CORE_REF>,
- <&clk IMX8MP_CLK_USB_ROOT>;
+ <&clk IMX8MP_CLK_USB_SUSP>;
clock-names = "bus_early", "ref", "suspend";
interrupts = <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>;
phys = <&usb3_phy1>, <&usb3_phy1>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts b/arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts
index 9dda2a1554c3..8614c18b5998 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts
@@ -133,7 +133,7 @@
pinctrl-0 = <&pinctrl_i2c1>;
status = "okay";
- i2cmux@70 {
+ i2c-mux@70 {
compatible = "nxp,pca9546";
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_i2c1_pca9546>;
@@ -216,7 +216,7 @@
pinctrl-0 = <&pinctrl_i2c4>;
status = "okay";
- pca9546: i2cmux@70 {
+ pca9546: i2c-mux@70 {
compatible = "nxp,pca9546";
reg = <0x70>;
#address-cells = <1>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-thor96.dts b/arch/arm64/boot/dts/freescale/imx8mq-thor96.dts
index 5d5aa6537225..6e6182709d22 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-thor96.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mq-thor96.dts
@@ -339,7 +339,7 @@
bus-width = <4>;
non-removable;
no-sd;
- no-emmc;
+ no-mmc;
status = "okay";
brcmf: wifi@1 {
@@ -359,7 +359,7 @@
cd-gpios = <&gpio2 12 GPIO_ACTIVE_LOW>;
bus-width = <4>;
no-sdio;
- no-emmc;
+ no-mmc;
disable-wp;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts b/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts
index 07d8dd8160f6..afa883389456 100644
--- a/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts
+++ b/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts
@@ -61,7 +61,7 @@
pinctrl-0 = <&pinctrl_lpi2c1 &pinctrl_ioexp_rst>;
status = "okay";
- i2c-switch@71 {
+ i2c-mux@71 {
compatible = "nxp,pca9646", "nxp,pca9546";
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts b/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts
index 69786c326db0..27f9a9f33134 100644
--- a/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts
@@ -74,7 +74,7 @@
pinctrl_usdhc1: usdhc1grp {
fsl,pins = <
- MX93_PAD_SD1_CLK__USDHC1_CLK 0x17fe
+ MX93_PAD_SD1_CLK__USDHC1_CLK 0x15fe
MX93_PAD_SD1_CMD__USDHC1_CMD 0x13fe
MX93_PAD_SD1_DATA0__USDHC1_DATA0 0x13fe
MX93_PAD_SD1_DATA1__USDHC1_DATA1 0x13fe
@@ -84,7 +84,7 @@
MX93_PAD_SD1_DATA5__USDHC1_DATA5 0x13fe
MX93_PAD_SD1_DATA6__USDHC1_DATA6 0x13fe
MX93_PAD_SD1_DATA7__USDHC1_DATA7 0x13fe
- MX93_PAD_SD1_STROBE__USDHC1_STROBE 0x17fe
+ MX93_PAD_SD1_STROBE__USDHC1_STROBE 0x15fe
>;
};
@@ -102,7 +102,7 @@
pinctrl_usdhc2: usdhc2grp {
fsl,pins = <
- MX93_PAD_SD2_CLK__USDHC2_CLK 0x17fe
+ MX93_PAD_SD2_CLK__USDHC2_CLK 0x15fe
MX93_PAD_SD2_CMD__USDHC2_CMD 0x13fe
MX93_PAD_SD2_DATA0__USDHC2_DATA0 0x13fe
MX93_PAD_SD2_DATA1__USDHC2_DATA1 0x13fe
diff --git a/arch/arm64/boot/dts/marvell/ac5-98dx25xx.dtsi b/arch/arm64/boot/dts/marvell/ac5-98dx25xx.dtsi
index 7308f7b6b22c..8bce64069138 100644
--- a/arch/arm64/boot/dts/marvell/ac5-98dx25xx.dtsi
+++ b/arch/arm64/boot/dts/marvell/ac5-98dx25xx.dtsi
@@ -98,7 +98,7 @@
uart1: serial@12100 {
compatible = "snps,dw-apb-uart";
- reg = <0x11000 0x100>;
+ reg = <0x12100 0x100>;
reg-shift = <2>;
interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
reg-io-width = <1>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi
index 5d31536f4c48..c10cfeb1214d 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi
@@ -2146,7 +2146,7 @@
};
vdosys0: syscon@1c01a000 {
- compatible = "mediatek,mt8195-mmsys", "syscon";
+ compatible = "mediatek,mt8195-vdosys0", "mediatek,mt8195-mmsys", "syscon";
reg = <0 0x1c01a000 0 0x1000>;
mboxes = <&gce0 0 CMDQ_THR_PRIO_4>;
#clock-cells = <1>;
@@ -2292,7 +2292,7 @@
};
vdosys1: syscon@1c100000 {
- compatible = "mediatek,mt8195-mmsys", "syscon";
+ compatible = "mediatek,mt8195-vdosys1", "syscon";
reg = <0 0x1c100000 0 0x1000>;
#clock-cells = <1>;
};
diff --git a/arch/arm64/boot/dts/qcom/msm8992-lg-bullhead.dtsi b/arch/arm64/boot/dts/qcom/msm8992-lg-bullhead.dtsi
index 87c90e93667f..79de9cc395c4 100644
--- a/arch/arm64/boot/dts/qcom/msm8992-lg-bullhead.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8992-lg-bullhead.dtsi
@@ -3,6 +3,7 @@
* Copyright (c) 2015, LGE Inc. All rights reserved.
* Copyright (c) 2016, The Linux Foundation. All rights reserved.
* Copyright (c) 2021, Petr Vorel <petr.vorel@gmail.com>
+ * Copyright (c) 2022, Dominik Kobinski <dominikkobinski314@gmail.com>
*/
/dts-v1/;
@@ -51,6 +52,11 @@
reg = <0 0x03400000 0 0x1200000>;
no-map;
};
+
+ removed_region: reserved@5000000 {
+ reg = <0 0x05000000 0 0x2200000>;
+ no-map;
+ };
};
};
diff --git a/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts b/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts
index b242c272d2af..fcca1ba94da6 100644
--- a/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts
+++ b/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts
@@ -11,6 +11,12 @@
#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/input/gpio-keys.h>
+/delete-node/ &adsp_mem;
+/delete-node/ &audio_mem;
+/delete-node/ &mpss_mem;
+/delete-node/ &peripheral_region;
+/delete-node/ &rmtfs_mem;
+
/ {
model = "Xiaomi Mi 4C";
compatible = "xiaomi,libra", "qcom,msm8992";
@@ -70,25 +76,67 @@
#size-cells = <2>;
ranges;
- /* This is for getting crash logs using Android downstream kernels */
- ramoops@dfc00000 {
- compatible = "ramoops";
- reg = <0x0 0xdfc00000 0x0 0x40000>;
- console-size = <0x10000>;
- record-size = <0x10000>;
- ftrace-size = <0x10000>;
- pmsg-size = <0x20000>;
+ memory_hole: hole@6400000 {
+ reg = <0 0x06400000 0 0x600000>;
+ no-map;
+ };
+
+ memory_hole2: hole2@6c00000 {
+ reg = <0 0x06c00000 0 0x2400000>;
+ no-map;
+ };
+
+ mpss_mem: mpss@9000000 {
+ reg = <0 0x09000000 0 0x5a00000>;
+ no-map;
+ };
+
+ tzapp: tzapp@ea00000 {
+ reg = <0 0x0ea00000 0 0x1900000>;
+ no-map;
+ };
+
+ mdm_rfsa_mem: mdm-rfsa@ca0b0000 {
+ reg = <0 0xca0b0000 0 0x10000>;
+ no-map;
+ };
+
+ rmtfs_mem: rmtfs@ca100000 {
+ compatible = "qcom,rmtfs-mem";
+ reg = <0 0xca100000 0 0x180000>;
+ no-map;
+
+ qcom,client-id = <1>;
};
- modem_region: modem_region@9000000 {
- reg = <0x0 0x9000000 0x0 0x5a00000>;
+ audio_mem: audio@cb400000 {
+ reg = <0 0xcb000000 0 0x400000>;
+ no-mem;
+ };
+
+ qseecom_mem: qseecom@cb400000 {
+ reg = <0 0xcb400000 0 0x1c00000>;
+ no-mem;
+ };
+
+ adsp_rfsa_mem: adsp-rfsa@cd000000 {
+ reg = <0 0xcd000000 0 0x10000>;
no-map;
};
- tzapp: modem_region@ea00000 {
- reg = <0x0 0xea00000 0x0 0x1900000>;
+ sensor_rfsa_mem: sensor-rfsa@cd010000 {
+ reg = <0 0xcd010000 0 0x10000>;
no-map;
};
+
+ ramoops@dfc00000 {
+ compatible = "ramoops";
+ reg = <0 0xdfc00000 0 0x40000>;
+ console-size = <0x10000>;
+ record-size = <0x10000>;
+ ftrace-size = <0x10000>;
+ pmsg-size = <0x20000>;
+ };
};
};
@@ -130,11 +178,6 @@
status = "okay";
};
-&peripheral_region {
- reg = <0x0 0x7400000 0x0 0x1c00000>;
- no-map;
-};
-
&pm8994_spmi_regulators {
VDD_APC0: s8 {
regulator-min-microvolt = <680000>;
diff --git a/arch/arm64/boot/dts/qcom/msm8992.dtsi b/arch/arm64/boot/dts/qcom/msm8992.dtsi
index 10adb4986ef1..02fc3795dbfd 100644
--- a/arch/arm64/boot/dts/qcom/msm8992.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8992.dtsi
@@ -37,10 +37,6 @@
compatible = "qcom,rpmcc-msm8992", "qcom,rpmcc";
};
-&tcsr_mutex {
- compatible = "qcom,sfpb-mutex";
-};
-
&timer {
interrupts = <GIC_PPI 2 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
<GIC_PPI 3 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
diff --git a/arch/arm64/boot/dts/qcom/msm8994-huawei-angler-rev-101.dts b/arch/arm64/boot/dts/qcom/msm8994-huawei-angler-rev-101.dts
index 85abff0e9b3f..7b0f62144c3e 100644
--- a/arch/arm64/boot/dts/qcom/msm8994-huawei-angler-rev-101.dts
+++ b/arch/arm64/boot/dts/qcom/msm8994-huawei-angler-rev-101.dts
@@ -9,9 +9,6 @@
#include "msm8994.dtsi"
-/* Angler's firmware does not report where the memory is allocated */
-/delete-node/ &cont_splash_mem;
-
/ {
model = "Huawei Nexus 6P";
compatible = "huawei,angler", "qcom,msm8994";
@@ -28,6 +25,22 @@
chosen {
stdout-path = "serial0:115200n8";
};
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ tzapp_mem: tzapp@4800000 {
+ reg = <0 0x04800000 0 0x1900000>;
+ no-map;
+ };
+
+ removed_region: reserved@6300000 {
+ reg = <0 0x06300000 0 0xD00000>;
+ no-map;
+ };
+ };
};
&blsp1_uart2 {
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi
index 109c9d2b684d..71cf81a8eb4d 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi
@@ -10,6 +10,7 @@
#include <dt-bindings/interconnect/qcom,sc8280xp.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/mailbox/qcom-ipcc.h>
+#include <dt-bindings/phy/phy-qcom-qmp.h>
#include <dt-bindings/power/qcom-rpmpd.h>
#include <dt-bindings/soc/qcom,rpmh-rsc.h>
#include <dt-bindings/thermal/thermal.h>
@@ -762,7 +763,7 @@
<0>,
<0>,
<0>,
- <&usb_0_ssphy>,
+ <&usb_0_qmpphy QMP_USB43DP_USB3_PIPE_CLK>,
<0>,
<0>,
<0>,
@@ -770,7 +771,7 @@
<0>,
<0>,
<0>,
- <&usb_1_ssphy>,
+ <&usb_1_qmpphy QMP_USB43DP_USB3_PIPE_CLK>,
<0>,
<0>,
<0>,
@@ -1673,42 +1674,26 @@
};
};
- usb_0_qmpphy: phy-wrapper@88ec000 {
+ usb_0_qmpphy: phy@88eb000 {
compatible = "qcom,sc8280xp-qmp-usb43dp-phy";
- reg = <0 0x088ec000 0 0x1e4>,
- <0 0x088eb000 0 0x40>,
- <0 0x088ed000 0 0x1c8>;
- #address-cells = <2>;
- #size-cells = <2>;
- ranges;
+ reg = <0 0x088eb000 0 0x4000>;
clocks = <&gcc GCC_USB3_PRIM_PHY_AUX_CLK>,
- <&rpmhcc RPMH_CXO_CLK>,
<&gcc GCC_USB4_EUD_CLKREF_CLK>,
- <&gcc GCC_USB3_PRIM_PHY_COM_AUX_CLK>;
- clock-names = "aux", "ref_clk_src", "ref", "com_aux";
+ <&gcc GCC_USB3_PRIM_PHY_COM_AUX_CLK>,
+ <&gcc GCC_USB3_PRIM_PHY_PIPE_CLK>;
+ clock-names = "aux", "ref", "com_aux", "usb3_pipe";
+
+ power-domains = <&gcc USB30_PRIM_GDSC>;
resets = <&gcc GCC_USB3_PHY_PRIM_BCR>,
- <&gcc GCC_USB3_DP_PHY_PRIM_BCR>;
+ <&gcc GCC_USB4_DP_PHY_PRIM_BCR>;
reset-names = "phy", "common";
- power-domains = <&gcc USB30_PRIM_GDSC>;
+ #clock-cells = <1>;
+ #phy-cells = <1>;
status = "disabled";
-
- usb_0_ssphy: usb3-phy@88eb400 {
- reg = <0 0x088eb400 0 0x100>,
- <0 0x088eb600 0 0x3ec>,
- <0 0x088ec400 0 0x364>,
- <0 0x088eba00 0 0x100>,
- <0 0x088ebc00 0 0x3ec>,
- <0 0x088ec200 0 0x18>;
- #phy-cells = <0>;
- #clock-cells = <0>;
- clocks = <&gcc GCC_USB3_PRIM_PHY_PIPE_CLK>;
- clock-names = "pipe0";
- clock-output-names = "usb0_phy_pipe_clk_src";
- };
};
usb_1_hsphy: phy@8902000 {
@@ -1725,42 +1710,26 @@
status = "disabled";
};
- usb_1_qmpphy: phy-wrapper@8904000 {
+ usb_1_qmpphy: phy@8903000 {
compatible = "qcom,sc8280xp-qmp-usb43dp-phy";
- reg = <0 0x08904000 0 0x1e4>,
- <0 0x08903000 0 0x40>,
- <0 0x08905000 0 0x1c8>;
- #address-cells = <2>;
- #size-cells = <2>;
- ranges;
+ reg = <0 0x08903000 0 0x4000>;
clocks = <&gcc GCC_USB3_SEC_PHY_AUX_CLK>,
- <&rpmhcc RPMH_CXO_CLK>,
<&gcc GCC_USB4_CLKREF_CLK>,
- <&gcc GCC_USB3_SEC_PHY_COM_AUX_CLK>;
- clock-names = "aux", "ref_clk_src", "ref", "com_aux";
+ <&gcc GCC_USB3_SEC_PHY_COM_AUX_CLK>,
+ <&gcc GCC_USB3_SEC_PHY_PIPE_CLK>;
+ clock-names = "aux", "ref", "com_aux", "usb3_pipe";
+
+ power-domains = <&gcc USB30_SEC_GDSC>;
resets = <&gcc GCC_USB3_PHY_SEC_BCR>,
<&gcc GCC_USB4_1_DP_PHY_PRIM_BCR>;
reset-names = "phy", "common";
- power-domains = <&gcc USB30_SEC_GDSC>;
+ #clock-cells = <1>;
+ #phy-cells = <1>;
status = "disabled";
-
- usb_1_ssphy: usb3-phy@8903400 {
- reg = <0 0x08903400 0 0x100>,
- <0 0x08903600 0 0x3ec>,
- <0 0x08904400 0 0x364>,
- <0 0x08903a00 0 0x100>,
- <0 0x08903c00 0 0x3ec>,
- <0 0x08904200 0 0x18>;
- #phy-cells = <0>;
- #clock-cells = <0>;
- clocks = <&gcc GCC_USB3_SEC_PHY_PIPE_CLK>;
- clock-names = "pipe0";
- clock-output-names = "usb1_phy_pipe_clk_src";
- };
};
pmu@9091000 {
@@ -1910,7 +1879,7 @@
reg = <0 0x0a600000 0 0xcd00>;
interrupts = <GIC_SPI 803 IRQ_TYPE_LEVEL_HIGH>;
iommus = <&apps_smmu 0x820 0x0>;
- phys = <&usb_0_hsphy>, <&usb_0_ssphy>;
+ phys = <&usb_0_hsphy>, <&usb_0_qmpphy QMP_USB43DP_USB3_PHY>;
phy-names = "usb2-phy", "usb3-phy";
};
};
@@ -1964,7 +1933,7 @@
reg = <0 0x0a800000 0 0xcd00>;
interrupts = <GIC_SPI 810 IRQ_TYPE_LEVEL_HIGH>;
iommus = <&apps_smmu 0x860 0x0>;
- phys = <&usb_1_hsphy>, <&usb_1_ssphy>;
+ phys = <&usb_1_hsphy>, <&usb_1_qmpphy QMP_USB43DP_USB3_PHY>;
phy-names = "usb2-phy", "usb3-phy";
};
};
diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi
index dab5579946f3..927032863e2f 100644
--- a/arch/arm64/boot/dts/qcom/sm8250.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi
@@ -334,7 +334,6 @@
exit-latency-us = <6562>;
min-residency-us = <9987>;
local-timer-stop;
- status = "disabled";
};
};
};
diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi
index 245dce24ec59..fb3cd20a82b5 100644
--- a/arch/arm64/boot/dts/qcom/sm8350.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi
@@ -2382,8 +2382,8 @@
<&rpmhcc RPMH_CXO_CLK>;
clock-names = "iface", "core", "xo";
resets = <&gcc GCC_SDCC2_BCR>;
- interconnects = <&aggre2_noc MASTER_SDCC_2 0 &mc_virt SLAVE_EBI1 0>,
- <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_SDCC_2 0>;
+ interconnects = <&aggre2_noc MASTER_SDCC_2 &mc_virt SLAVE_EBI1>,
+ <&gem_noc MASTER_APPSS_PROC &config_noc SLAVE_SDCC_2>;
interconnect-names = "sdhc-ddr","cpu-sdhc";
iommus = <&apps_smmu 0x4a0 0x0>;
power-domains = <&rpmhpd SM8350_CX>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts
index aa22a0c22265..5d5d9574088c 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts
@@ -96,7 +96,6 @@
linux,default-trigger = "heartbeat";
gpios = <&rk805 1 GPIO_ACTIVE_LOW>;
default-state = "on";
- mode = <0x23>;
};
user_led: led-1 {
@@ -104,7 +103,6 @@
linux,default-trigger = "mmc1";
gpios = <&rk805 0 GPIO_ACTIVE_LOW>;
default-state = "off";
- mode = <0x05>;
};
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-op1-opp.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-op1-opp.dtsi
index 6e29e74f6fc6..783120e9cebe 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-op1-opp.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-op1-opp.dtsi
@@ -111,7 +111,7 @@
};
};
- dmc_opp_table: dmc_opp_table {
+ dmc_opp_table: opp-table-3 {
compatible = "operating-points-v2";
opp00 {
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts
index 04403a76238b..a0795a2b1cb1 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts
@@ -104,6 +104,13 @@
};
};
+&cpu_alert0 {
+ temperature = <65000>;
+};
+&cpu_alert1 {
+ temperature = <68000>;
+};
+
&cpu_l0 {
cpu-supply = <&vdd_cpu_l>;
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
index 4391aea25984..1881b4b71f91 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
@@ -589,7 +589,7 @@
clocks = <&cru HCLK_M_CRYPTO0>, <&cru HCLK_S_CRYPTO0>, <&cru SCLK_CRYPTO0>;
clock-names = "hclk_master", "hclk_slave", "sclk";
resets = <&cru SRST_CRYPTO0>, <&cru SRST_CRYPTO0_S>, <&cru SRST_CRYPTO0_M>;
- reset-names = "master", "lave", "crypto";
+ reset-names = "master", "slave", "crypto-rst";
};
crypto1: crypto@ff8b8000 {
@@ -599,7 +599,7 @@
clocks = <&cru HCLK_M_CRYPTO1>, <&cru HCLK_S_CRYPTO1>, <&cru SCLK_CRYPTO1>;
clock-names = "hclk_master", "hclk_slave", "sclk";
resets = <&cru SRST_CRYPTO1>, <&cru SRST_CRYPTO1_S>, <&cru SRST_CRYPTO1_M>;
- reset-names = "master", "slave", "crypto";
+ reset-names = "master", "slave", "crypto-rst";
};
i2c1: i2c@ff110000 {
@@ -2241,13 +2241,11 @@
pcfg_input_pull_up: pcfg-input-pull-up {
input-enable;
bias-pull-up;
- drive-strength = <2>;
};
pcfg_input_pull_down: pcfg-input-pull-down {
input-enable;
bias-pull-down;
- drive-strength = <2>;
};
clock {
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts b/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts
index 4c7f9abd594f..d956496d5221 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts
@@ -353,6 +353,17 @@
};
};
+&pmu_io_domains {
+ pmuio2-supply = <&vcc_3v3>;
+ vccio1-supply = <&vcc_3v3>;
+ vccio3-supply = <&vcc_3v3>;
+ vccio4-supply = <&vcca_1v8>;
+ vccio5-supply = <&vcc_3v3>;
+ vccio6-supply = <&vcca_1v8>;
+ vccio7-supply = <&vcc_3v3>;
+ status = "okay";
+};
+
&pwm0 {
status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts
index a1c5fdf7d68f..3c9d85257cc9 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts
@@ -571,6 +571,8 @@
};
&i2s1_8ch {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2s1m0_sclktx &i2s1m0_lrcktx &i2s1m0_sdi0 &i2s1m0_sdo0>;
rockchip,trcm-sync-tx-only;
status = "okay";
};
@@ -730,14 +732,13 @@
disable-wp;
pinctrl-names = "default";
pinctrl-0 = <&sdmmc0_bus4 &sdmmc0_clk &sdmmc0_cmd &sdmmc0_det>;
- sd-uhs-sdr104;
+ sd-uhs-sdr50;
vmmc-supply = <&vcc3v3_sd>;
vqmmc-supply = <&vccio_sd>;
status = "okay";
};
&sdmmc2 {
- supports-sdio;
bus-width = <4>;
disable-wp;
cap-sd-highspeed;
diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
index 5706c3e24f0a..c27f1c7f072d 100644
--- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
@@ -966,6 +966,7 @@
clock-names = "aclk_mst", "aclk_slv",
"aclk_dbi", "pclk", "aux";
device_type = "pci";
+ #interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 7>;
interrupt-map = <0 0 0 1 &pcie_intc 0>,
<0 0 0 2 &pcie_intc 1>,
diff --git a/arch/arm64/crypto/sm4-ce-ccm-core.S b/arch/arm64/crypto/sm4-ce-ccm-core.S
index 028207c4afd0..fa85856f33ce 100644
--- a/arch/arm64/crypto/sm4-ce-ccm-core.S
+++ b/arch/arm64/crypto/sm4-ce-ccm-core.S
@@ -8,6 +8,7 @@
*/
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/assembler.h>
#include "sm4-ce-asm.h"
@@ -104,7 +105,7 @@ SYM_FUNC_START(sm4_ce_ccm_final)
SYM_FUNC_END(sm4_ce_ccm_final)
.align 3
-SYM_FUNC_START(sm4_ce_ccm_enc)
+SYM_TYPED_FUNC_START(sm4_ce_ccm_enc)
/* input:
* x0: round key array, CTX
* x1: dst
@@ -216,7 +217,7 @@ SYM_FUNC_START(sm4_ce_ccm_enc)
SYM_FUNC_END(sm4_ce_ccm_enc)
.align 3
-SYM_FUNC_START(sm4_ce_ccm_dec)
+SYM_TYPED_FUNC_START(sm4_ce_ccm_dec)
/* input:
* x0: round key array, CTX
* x1: dst
diff --git a/arch/arm64/crypto/sm4-ce-gcm-core.S b/arch/arm64/crypto/sm4-ce-gcm-core.S
index 7aa3ec18a289..347f25d75727 100644
--- a/arch/arm64/crypto/sm4-ce-gcm-core.S
+++ b/arch/arm64/crypto/sm4-ce-gcm-core.S
@@ -9,6 +9,7 @@
*/
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/assembler.h>
#include "sm4-ce-asm.h"
@@ -370,7 +371,7 @@ SYM_FUNC_START(pmull_ghash_update)
SYM_FUNC_END(pmull_ghash_update)
.align 3
-SYM_FUNC_START(sm4_ce_pmull_gcm_enc)
+SYM_TYPED_FUNC_START(sm4_ce_pmull_gcm_enc)
/* input:
* x0: round key array, CTX
* x1: dst
@@ -581,7 +582,7 @@ SYM_FUNC_END(sm4_ce_pmull_gcm_enc)
#define RH3 v20
.align 3
-SYM_FUNC_START(sm4_ce_pmull_gcm_dec)
+SYM_TYPED_FUNC_START(sm4_ce_pmull_gcm_dec)
/* input:
* x0: round key array, CTX
* x1: dst
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index 0890e4f568fb..cbb3d961123b 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -315,7 +315,7 @@ __ll_sc__cmpxchg_double##name(unsigned long old1, \
" cbnz %w0, 1b\n" \
" " #mb "\n" \
"2:" \
- : "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr) \
+ : "=&r" (tmp), "=&r" (ret), "+Q" (*(__uint128_t *)ptr) \
: "r" (old1), "r" (old2), "r" (new1), "r" (new2) \
: cl); \
\
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 52075e93de6c..a94d6dacc029 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -311,7 +311,7 @@ __lse__cmpxchg_double##name(unsigned long old1, \
" eor %[old2], %[old2], %[oldval2]\n" \
" orr %[old1], %[old1], %[old2]" \
: [old1] "+&r" (x0), [old2] "+&r" (x1), \
- [v] "+Q" (*(unsigned long *)ptr) \
+ [v] "+Q" (*(__uint128_t *)ptr) \
: [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \
[oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \
: cl); \
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 4e8b66c74ea2..683ca3af4084 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -124,6 +124,8 @@
#define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025
#define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028
#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029
+#define APPLE_CPU_PART_M2_BLIZZARD 0x032
+#define APPLE_CPU_PART_M2_AVALANCHE 0x033
#define AMPERE_CPU_PART_AMPERE1 0xAC3
@@ -177,6 +179,8 @@
#define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO)
#define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX)
#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
+#define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD)
+#define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE)
#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 31d13a6001df..de4ff90785b2 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -48,8 +48,17 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
})
extern spinlock_t efi_rt_lock;
+extern u64 *efi_rt_stack_top;
efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...);
+/*
+ * efi_rt_stack_top[-1] contains the value the stack pointer had before
+ * switching to the EFI runtime stack.
+ */
+#define current_in_efi() \
+ (!preemptible() && efi_rt_stack_top != NULL && \
+ on_task_stack(current, READ_ONCE(efi_rt_stack_top[-1]), 1))
+
#define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
/*
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 15b34fbfca66..206de10524e3 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -114,6 +114,15 @@
#define ESR_ELx_FSC_ACCESS (0x08)
#define ESR_ELx_FSC_FAULT (0x04)
#define ESR_ELx_FSC_PERM (0x0C)
+#define ESR_ELx_FSC_SEA_TTW0 (0x14)
+#define ESR_ELx_FSC_SEA_TTW1 (0x15)
+#define ESR_ELx_FSC_SEA_TTW2 (0x16)
+#define ESR_ELx_FSC_SEA_TTW3 (0x17)
+#define ESR_ELx_FSC_SECC (0x18)
+#define ESR_ELx_FSC_SECC_TTW0 (0x1c)
+#define ESR_ELx_FSC_SECC_TTW1 (0x1d)
+#define ESR_ELx_FSC_SECC_TTW2 (0x1e)
+#define ESR_ELx_FSC_SECC_TTW3 (0x1f)
/* ISS field definitions for Data Aborts */
#define ESR_ELx_ISV_SHIFT (24)
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index d20f5da2d76f..6a4a1ab8eb23 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -49,6 +49,15 @@ extern pte_t huge_ptep_get(pte_t *ptep);
void __init arm64_hugetlb_cma_reserve(void);
+#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start
+extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+
+#define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit
+extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t new_pte);
+
#include <asm-generic/hugetlb.h>
#endif /* __ASM_HUGETLB_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 0df3fc3a0173..26b0c97df986 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -319,21 +319,6 @@
BIT(18) | \
GENMASK(16, 15))
-/* For compatibility with fault code shared with 32-bit */
-#define FSC_FAULT ESR_ELx_FSC_FAULT
-#define FSC_ACCESS ESR_ELx_FSC_ACCESS
-#define FSC_PERM ESR_ELx_FSC_PERM
-#define FSC_SEA ESR_ELx_FSC_EXTABT
-#define FSC_SEA_TTW0 (0x14)
-#define FSC_SEA_TTW1 (0x15)
-#define FSC_SEA_TTW2 (0x16)
-#define FSC_SEA_TTW3 (0x17)
-#define FSC_SECC (0x18)
-#define FSC_SECC_TTW0 (0x1c)
-#define FSC_SECC_TTW1 (0x1d)
-#define FSC_SECC_TTW2 (0x1e)
-#define FSC_SECC_TTW3 (0x1f)
-
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~UL(0xf))
/*
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 9bdba47f7e14..193583df2d9c 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -349,16 +349,16 @@ static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *v
static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
{
switch (kvm_vcpu_trap_get_fault(vcpu)) {
- case FSC_SEA:
- case FSC_SEA_TTW0:
- case FSC_SEA_TTW1:
- case FSC_SEA_TTW2:
- case FSC_SEA_TTW3:
- case FSC_SECC:
- case FSC_SECC_TTW0:
- case FSC_SECC_TTW1:
- case FSC_SECC_TTW2:
- case FSC_SECC_TTW3:
+ case ESR_ELx_FSC_EXTABT:
+ case ESR_ELx_FSC_SEA_TTW0:
+ case ESR_ELx_FSC_SEA_TTW1:
+ case ESR_ELx_FSC_SEA_TTW2:
+ case ESR_ELx_FSC_SEA_TTW3:
+ case ESR_ELx_FSC_SECC:
+ case ESR_ELx_FSC_SECC_TTW0:
+ case ESR_ELx_FSC_SECC_TTW1:
+ case ESR_ELx_FSC_SECC_TTW2:
+ case ESR_ELx_FSC_SECC_TTW3:
return true;
default:
return false;
@@ -373,8 +373,26 @@ static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
{
- if (kvm_vcpu_abt_iss1tw(vcpu))
- return true;
+ if (kvm_vcpu_abt_iss1tw(vcpu)) {
+ /*
+ * Only a permission fault on a S1PTW should be
+ * considered as a write. Otherwise, page tables baked
+ * in a read-only memslot will result in an exception
+ * being delivered in the guest.
+ *
+ * The drawback is that we end-up faulting twice if the
+ * guest is using any of HW AF/DB: a translation fault
+ * to map the page containing the PT (read only at
+ * first), then a permission fault to allow the flags
+ * to be set.
+ */
+ switch (kvm_vcpu_trap_get_fault_type(vcpu)) {
+ case ESR_ELx_FSC_PERM:
+ return true;
+ default:
+ return false;
+ }
+ }
if (kvm_vcpu_trap_is_iabt(vcpu))
return false;
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index b4bbeed80fb6..65e78999c75d 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -681,7 +681,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
#define pud_valid(pud) pte_valid(pud_pte(pud))
#define pud_user(pud) pte_user(pud_pte(pud))
-
+#define pud_user_exec(pud) pte_user_exec(pud_pte(pud))
static inline void set_pud(pud_t *pudp, pud_t pud)
{
@@ -730,6 +730,7 @@ static inline pmd_t *pud_pgtable(pud_t pud)
#else
#define pud_page_paddr(pud) ({ BUILD_BUG(); 0; })
+#define pud_user_exec(pud) pud_user(pud) /* Always 0 with folding */
/* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */
#define pmd_set_fixmap(addr) NULL
@@ -862,12 +863,12 @@ static inline bool pte_user_accessible_page(pte_t pte)
static inline bool pmd_user_accessible_page(pmd_t pmd)
{
- return pmd_leaf(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
+ return pmd_leaf(pmd) && !pmd_present_invalid(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
}
static inline bool pud_user_accessible_page(pud_t pud)
{
- return pud_leaf(pud) && pud_user(pud);
+ return pud_leaf(pud) && (pud_user(pud) || pud_user_exec(pud));
}
#endif
@@ -1093,6 +1094,15 @@ static inline bool pud_sect_supported(void)
}
+#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
+#define ptep_modify_prot_start ptep_modify_prot_start
+extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+
+#define ptep_modify_prot_commit ptep_modify_prot_commit
+extern void ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t new_pte);
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_PGTABLE_H */
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 4e5354beafb0..66ec8caa6ac0 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -106,4 +106,19 @@ static inline struct stack_info stackinfo_get_sdei_critical(void)
#define stackinfo_get_sdei_critical() stackinfo_get_unknown()
#endif
+#ifdef CONFIG_EFI
+extern u64 *efi_rt_stack_top;
+
+static inline struct stack_info stackinfo_get_efi(void)
+{
+ unsigned long high = (u64)efi_rt_stack_top;
+ unsigned long low = high - THREAD_SIZE;
+
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
+}
+#endif
+
#endif /* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h
index ba4bff5ca674..2b09495499c6 100644
--- a/arch/arm64/include/asm/uprobes.h
+++ b/arch/arm64/include/asm/uprobes.h
@@ -16,7 +16,7 @@
#define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE
#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES
-typedef u32 uprobe_opcode_t;
+typedef __le32 uprobe_opcode_t;
struct arch_uprobe_task {
};
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 89ac00084f38..307faa2b4395 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -661,6 +661,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus),
},
#endif
+#ifdef CONFIG_ARM64_ERRATUM_2645198
+ {
+ .desc = "ARM erratum 2645198",
+ .capability = ARM64_WORKAROUND_2645198,
+ ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A715)
+ },
+#endif
#ifdef CONFIG_ARM64_ERRATUM_2077057
{
.desc = "ARM erratum 2077057",
diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
index a00886410537..e8ae803662cf 100644
--- a/arch/arm64/kernel/efi-rt-wrapper.S
+++ b/arch/arm64/kernel/efi-rt-wrapper.S
@@ -4,6 +4,7 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
SYM_FUNC_START(__efi_rt_asm_wrapper)
stp x29, x30, [sp, #-112]!
@@ -45,7 +46,10 @@ SYM_FUNC_START(__efi_rt_asm_wrapper)
mov x4, x6
blr x8
+ mov x16, sp
mov sp, x29
+ str xzr, [x16, #8] // clear recorded task SP value
+
ldp x1, x2, [sp, #16]
cmp x2, x18
ldp x29, x30, [sp], #112
@@ -70,6 +74,9 @@ SYM_FUNC_END(__efi_rt_asm_wrapper)
SYM_CODE_START(__efi_rt_asm_recover)
mov sp, x30
+ ldr_l x16, efi_rt_stack_top // clear recorded task SP value
+ str xzr, [x16, #-8]
+
ldp x19, x20, [sp, #32]
ldp x21, x22, [sp, #48]
ldp x23, x24, [sp, #64]
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index fab05de2e12d..b273900f4566 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -11,6 +11,7 @@
#include <linux/init.h>
#include <asm/efi.h>
+#include <asm/stacktrace.h>
static bool region_is_misaligned(const efi_memory_desc_t *md)
{
@@ -154,7 +155,7 @@ asmlinkage efi_status_t __efi_rt_asm_recover(void);
bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg)
{
/* Check whether the exception occurred while running the firmware */
- if (current_work() != &efi_rts_work.work || regs->pc >= TASK_SIZE_64)
+ if (!current_in_efi() || regs->pc >= TASK_SIZE_64)
return false;
pr_err(FW_BUG "Unable to handle %s in EFI runtime service\n", msg);
diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c
index 353009d7f307..2e94d20c4ac7 100644
--- a/arch/arm64/kernel/elfcore.c
+++ b/arch/arm64/kernel/elfcore.c
@@ -8,28 +8,27 @@
#include <asm/cpufeature.h>
#include <asm/mte.h>
-#define for_each_mte_vma(vmi, vma) \
+#define for_each_mte_vma(cprm, i, m) \
if (system_supports_mte()) \
- for_each_vma(vmi, vma) \
- if (vma->vm_flags & VM_MTE)
+ for (i = 0, m = cprm->vma_meta; \
+ i < cprm->vma_count; \
+ i++, m = cprm->vma_meta + i) \
+ if (m->flags & VM_MTE)
-static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma)
+static unsigned long mte_vma_tag_dump_size(struct core_vma_metadata *m)
{
- if (vma->vm_flags & VM_DONTDUMP)
- return 0;
-
- return vma_pages(vma) * MTE_PAGE_TAG_STORAGE;
+ return (m->dump_size >> PAGE_SHIFT) * MTE_PAGE_TAG_STORAGE;
}
/* Derived from dump_user_range(); start/end must be page-aligned */
static int mte_dump_tag_range(struct coredump_params *cprm,
- unsigned long start, unsigned long end)
+ unsigned long start, unsigned long len)
{
int ret = 1;
unsigned long addr;
void *tags = NULL;
- for (addr = start; addr < end; addr += PAGE_SIZE) {
+ for (addr = start; addr < start + len; addr += PAGE_SIZE) {
struct page *page = get_dump_page(addr);
/*
@@ -65,7 +64,6 @@ static int mte_dump_tag_range(struct coredump_params *cprm,
mte_save_page_tags(page_address(page), tags);
put_page(page);
if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) {
- mte_free_tag_storage(tags);
ret = 0;
break;
}
@@ -77,13 +75,13 @@ static int mte_dump_tag_range(struct coredump_params *cprm,
return ret;
}
-Elf_Half elf_core_extra_phdrs(void)
+Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm)
{
- struct vm_area_struct *vma;
+ int i;
+ struct core_vma_metadata *m;
int vma_count = 0;
- VMA_ITERATOR(vmi, current->mm, 0);
- for_each_mte_vma(vmi, vma)
+ for_each_mte_vma(cprm, i, m)
vma_count++;
return vma_count;
@@ -91,18 +89,18 @@ Elf_Half elf_core_extra_phdrs(void)
int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
{
- struct vm_area_struct *vma;
- VMA_ITERATOR(vmi, current->mm, 0);
+ int i;
+ struct core_vma_metadata *m;
- for_each_mte_vma(vmi, vma) {
+ for_each_mte_vma(cprm, i, m) {
struct elf_phdr phdr;
phdr.p_type = PT_AARCH64_MEMTAG_MTE;
phdr.p_offset = offset;
- phdr.p_vaddr = vma->vm_start;
+ phdr.p_vaddr = m->start;
phdr.p_paddr = 0;
- phdr.p_filesz = mte_vma_tag_dump_size(vma);
- phdr.p_memsz = vma->vm_end - vma->vm_start;
+ phdr.p_filesz = mte_vma_tag_dump_size(m);
+ phdr.p_memsz = m->end - m->start;
offset += phdr.p_filesz;
phdr.p_flags = 0;
phdr.p_align = 0;
@@ -114,28 +112,25 @@ int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
return 1;
}
-size_t elf_core_extra_data_size(void)
+size_t elf_core_extra_data_size(struct coredump_params *cprm)
{
- struct vm_area_struct *vma;
+ int i;
+ struct core_vma_metadata *m;
size_t data_size = 0;
- VMA_ITERATOR(vmi, current->mm, 0);
- for_each_mte_vma(vmi, vma)
- data_size += mte_vma_tag_dump_size(vma);
+ for_each_mte_vma(cprm, i, m)
+ data_size += mte_vma_tag_dump_size(m);
return data_size;
}
int elf_core_write_extra_data(struct coredump_params *cprm)
{
- struct vm_area_struct *vma;
- VMA_ITERATOR(vmi, current->mm, 0);
-
- for_each_mte_vma(vmi, vma) {
- if (vma->vm_flags & VM_DONTDUMP)
- continue;
+ int i;
+ struct core_vma_metadata *m;
- if (!mte_dump_tag_range(cprm, vma->vm_start, vma->vm_end))
+ for_each_mte_vma(cprm, i, m) {
+ if (!mte_dump_tag_range(cprm, m->start, m->dump_size))
return 0;
}
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index dcc81e7200d4..b6ef1af0122e 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -385,7 +385,7 @@ static void task_fpsimd_load(void)
WARN_ON(!system_supports_fpsimd());
WARN_ON(!have_cpu_fpsimd_context());
- if (system_supports_sve()) {
+ if (system_supports_sve() || system_supports_sme()) {
switch (current->thread.fp_type) {
case FP_STATE_FPSIMD:
/* Stop tracking SVE for this task until next use. */
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index a5193f2146a6..dde06c0f97f3 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -1023,12 +1023,6 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
return 0;
}
-static bool armv8pmu_filter(struct pmu *pmu, int cpu)
-{
- struct arm_pmu *armpmu = to_arm_pmu(pmu);
- return !cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus);
-}
-
static void armv8pmu_reset(void *info)
{
struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
@@ -1069,6 +1063,14 @@ static int __armv8_pmuv3_map_event(struct perf_event *event,
&armv8_pmuv3_perf_cache_map,
ARMV8_PMU_EVTYPE_EVENT);
+ /*
+ * CHAIN events only work when paired with an adjacent counter, and it
+ * never makes sense for a user to open one in isolation, as they'll be
+ * rotated arbitrarily.
+ */
+ if (hw_event_id == ARMV8_PMUV3_PERFCTR_CHAIN)
+ return -EINVAL;
+
if (armv8pmu_event_is_64bit(event))
event->hw.flags |= ARMPMU_EVT_64BIT;
@@ -1258,7 +1260,6 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
cpu_pmu->stop = armv8pmu_stop;
cpu_pmu->reset = armv8pmu_reset;
cpu_pmu->set_event_filter = armv8pmu_set_event_filter;
- cpu_pmu->filter = armv8pmu_filter;
cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx;
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 2686ab157601..0c321ad23cd3 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1357,7 +1357,7 @@ enum aarch64_regset {
#ifdef CONFIG_ARM64_SVE
REGSET_SVE,
#endif
-#ifdef CONFIG_ARM64_SVE
+#ifdef CONFIG_ARM64_SME
REGSET_SSVE,
REGSET_ZA,
#endif
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index e0d09bf5b01b..be279fd48248 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -281,7 +281,12 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
vl = task_get_sme_vl(current);
} else {
- if (!system_supports_sve())
+ /*
+ * A SME only system use SVE for streaming mode so can
+ * have a SVE formatted context with a zero VL and no
+ * payload data.
+ */
+ if (!system_supports_sve() && !system_supports_sme())
return -EINVAL;
vl = task_get_sve_vl(current);
@@ -732,7 +737,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
return err;
}
- if (system_supports_sve()) {
+ if (system_supports_sve() || system_supports_sme()) {
unsigned int vq = 0;
if (add_all || test_thread_flag(TIF_SVE) ||
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 117e2c180f3c..83154303e682 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -5,6 +5,7 @@
* Copyright (C) 2012 ARM Ltd.
*/
#include <linux/kernel.h>
+#include <linux/efi.h>
#include <linux/export.h>
#include <linux/ftrace.h>
#include <linux/sched.h>
@@ -12,6 +13,7 @@
#include <linux/sched/task_stack.h>
#include <linux/stacktrace.h>
+#include <asm/efi.h>
#include <asm/irq.h>
#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
@@ -186,6 +188,13 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
: stackinfo_get_unknown(); \
})
+#define STACKINFO_EFI \
+ ({ \
+ ((task == current) && current_in_efi()) \
+ ? stackinfo_get_efi() \
+ : stackinfo_get_unknown(); \
+ })
+
noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
void *cookie, struct task_struct *task,
struct pt_regs *regs)
@@ -200,6 +209,9 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
STACKINFO_SDEI(normal),
STACKINFO_SDEI(critical),
#endif
+#ifdef CONFIG_EFI
+ STACKINFO_EFI,
+#endif
};
struct unwind_state state = {
.stacks = stacks,
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 5626ddb540ce..cf4c495a4321 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -1079,7 +1079,7 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
/* uaccess failed, don't leave stale tags */
if (num_tags != MTE_GRANULES_PER_PAGE)
- mte_clear_page_tags(page);
+ mte_clear_page_tags(maddr);
set_page_mte_tagged(page);
kvm_release_pfn_dirty(pfn);
diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h
index 1b8a2dcd712f..9ddcfe2c3e57 100644
--- a/arch/arm64/kvm/hyp/include/hyp/fault.h
+++ b/arch/arm64/kvm/hyp/include/hyp/fault.h
@@ -60,7 +60,7 @@ static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
*/
if (!(esr & ESR_ELx_S1PTW) &&
(cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
- (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
+ (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM)) {
if (!__translate_far_to_hpfar(far, &hpfar))
return false;
} else {
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 3330d1b76bdd..07d37ff88a3f 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -367,7 +367,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
bool valid;
- valid = kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
+ valid = kvm_vcpu_trap_get_fault_type(vcpu) == ESR_ELx_FSC_FAULT &&
kvm_vcpu_dabt_isvalid(vcpu) &&
!kvm_vcpu_abt_issea(vcpu) &&
!kvm_vcpu_abt_iss1tw(vcpu);
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 31d7fa4c7c14..a3ee3b605c9b 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1212,7 +1212,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
VM_BUG_ON(write_fault && exec_fault);
- if (fault_status == FSC_PERM && !write_fault && !exec_fault) {
+ if (fault_status == ESR_ELx_FSC_PERM && !write_fault && !exec_fault) {
kvm_err("Unexpected L2 read permission error\n");
return -EFAULT;
}
@@ -1277,7 +1277,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* only exception to this is when dirty logging is enabled at runtime
* and a write fault needs to collapse a block entry into a table.
*/
- if (fault_status != FSC_PERM || (logging_active && write_fault)) {
+ if (fault_status != ESR_ELx_FSC_PERM ||
+ (logging_active && write_fault)) {
ret = kvm_mmu_topup_memory_cache(memcache,
kvm_mmu_cache_min_pages(kvm));
if (ret)
@@ -1342,7 +1343,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* backed by a THP and thus use block mapping if possible.
*/
if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
- if (fault_status == FSC_PERM && fault_granule > PAGE_SIZE)
+ if (fault_status == ESR_ELx_FSC_PERM &&
+ fault_granule > PAGE_SIZE)
vma_pagesize = fault_granule;
else
vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
@@ -1350,7 +1352,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
&fault_ipa);
}
- if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
+ if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) {
/* Check the VMM hasn't introduced a new disallowed VMA */
if (kvm_vma_mte_allowed(vma)) {
sanitise_mte_tags(kvm, pfn, vma_pagesize);
@@ -1376,7 +1378,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* permissions only if vma_pagesize equals fault_granule. Otherwise,
* kvm_pgtable_stage2_map() should be called to change block size.
*/
- if (fault_status == FSC_PERM && vma_pagesize == fault_granule)
+ if (fault_status == ESR_ELx_FSC_PERM && vma_pagesize == fault_granule)
ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
else
ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
@@ -1441,7 +1443,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
- if (fault_status == FSC_FAULT) {
+ if (fault_status == ESR_ELx_FSC_FAULT) {
/* Beyond sanitised PARange (which is the IPA limit) */
if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
kvm_inject_size_fault(vcpu);
@@ -1476,8 +1478,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
kvm_vcpu_get_hfar(vcpu), fault_ipa);
/* Check the stage-2 fault is trans. fault or write fault */
- if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
- fault_status != FSC_ACCESS) {
+ if (fault_status != ESR_ELx_FSC_FAULT &&
+ fault_status != ESR_ELx_FSC_PERM &&
+ fault_status != ESR_ELx_FSC_ACCESS) {
kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
kvm_vcpu_trap_get_class(vcpu),
(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
@@ -1539,7 +1542,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
/* Userspace should not be able to register out-of-bounds IPAs */
VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm));
- if (fault_status == FSC_ACCESS) {
+ if (fault_status == ESR_ELx_FSC_ACCESS) {
handle_access_fault(vcpu, fault_ipa);
ret = 1;
goto out_unlock;
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index d5ee52d6bf73..c6cbfe6b854b 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -646,7 +646,7 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
return;
/* Only preserve PMCR_EL0.N, and reset the rest to 0 */
- pmcr = read_sysreg(pmcr_el0) & ARMV8_PMU_PMCR_N_MASK;
+ pmcr = read_sysreg(pmcr_el0) & (ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT);
if (!kvm_supports_32bit_el0())
pmcr |= ARMV8_PMU_PMCR_LC;
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 94a666dd1443..2642e9ce2819 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -2187,7 +2187,7 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev,
((u64)ite->irq->intid << KVM_ITS_ITE_PINTID_SHIFT) |
ite->collection->collection_id;
val = cpu_to_le64(val);
- return kvm_write_guest_lock(kvm, gpa, &val, ite_esz);
+ return vgic_write_guest_lock(kvm, gpa, &val, ite_esz);
}
/**
@@ -2339,7 +2339,7 @@ static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev,
(itt_addr_field << KVM_ITS_DTE_ITTADDR_SHIFT) |
(dev->num_eventid_bits - 1));
val = cpu_to_le64(val);
- return kvm_write_guest_lock(kvm, ptr, &val, dte_esz);
+ return vgic_write_guest_lock(kvm, ptr, &val, dte_esz);
}
/**
@@ -2526,7 +2526,7 @@ static int vgic_its_save_cte(struct vgic_its *its,
((u64)collection->target_addr << KVM_ITS_CTE_RDBASE_SHIFT) |
collection->collection_id);
val = cpu_to_le64(val);
- return kvm_write_guest_lock(its->dev->kvm, gpa, &val, esz);
+ return vgic_write_guest_lock(its->dev->kvm, gpa, &val, esz);
}
/*
@@ -2607,7 +2607,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its)
*/
val = 0;
BUG_ON(cte_esz > sizeof(val));
- ret = kvm_write_guest_lock(its->dev->kvm, gpa, &val, cte_esz);
+ ret = vgic_write_guest_lock(its->dev->kvm, gpa, &val, cte_esz);
return ret;
}
@@ -2743,7 +2743,6 @@ static int vgic_its_has_attr(struct kvm_device *dev,
static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
{
const struct vgic_its_abi *abi = vgic_its_get_abi(its);
- struct vgic_dist *dist = &kvm->arch.vgic;
int ret = 0;
if (attr == KVM_DEV_ARM_VGIC_CTRL_INIT) /* Nothing to do */
@@ -2763,9 +2762,7 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
vgic_its_reset(kvm, its);
break;
case KVM_DEV_ARM_ITS_SAVE_TABLES:
- dist->save_its_tables_in_progress = true;
ret = abi->save_tables(its);
- dist->save_its_tables_in_progress = false;
break;
case KVM_DEV_ARM_ITS_RESTORE_TABLES:
ret = abi->restore_tables(its);
@@ -2792,7 +2789,7 @@ bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
- return dist->save_its_tables_in_progress;
+ return dist->table_write_in_progress;
}
static int vgic_its_set_attr(struct kvm_device *dev,
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 826ff6f2a4e7..684bdfaad4a9 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -339,7 +339,7 @@ retry:
if (status) {
/* clear consumed data */
val &= ~(1 << bit_nr);
- ret = kvm_write_guest_lock(kvm, ptr, &val, 1);
+ ret = vgic_write_guest_lock(kvm, ptr, &val, 1);
if (ret)
return ret;
}
@@ -350,26 +350,23 @@ retry:
* The deactivation of the doorbell interrupt will trigger the
* unmapping of the associated vPE.
*/
-static void unmap_all_vpes(struct vgic_dist *dist)
+static void unmap_all_vpes(struct kvm *kvm)
{
- struct irq_desc *desc;
+ struct vgic_dist *dist = &kvm->arch.vgic;
int i;
- for (i = 0; i < dist->its_vm.nr_vpes; i++) {
- desc = irq_to_desc(dist->its_vm.vpes[i]->irq);
- irq_domain_deactivate_irq(irq_desc_get_irq_data(desc));
- }
+ for (i = 0; i < dist->its_vm.nr_vpes; i++)
+ free_irq(dist->its_vm.vpes[i]->irq, kvm_get_vcpu(kvm, i));
}
-static void map_all_vpes(struct vgic_dist *dist)
+static void map_all_vpes(struct kvm *kvm)
{
- struct irq_desc *desc;
+ struct vgic_dist *dist = &kvm->arch.vgic;
int i;
- for (i = 0; i < dist->its_vm.nr_vpes; i++) {
- desc = irq_to_desc(dist->its_vm.vpes[i]->irq);
- irq_domain_activate_irq(irq_desc_get_irq_data(desc), false);
- }
+ for (i = 0; i < dist->its_vm.nr_vpes; i++)
+ WARN_ON(vgic_v4_request_vpe_irq(kvm_get_vcpu(kvm, i),
+ dist->its_vm.vpes[i]->irq));
}
/**
@@ -394,7 +391,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
* and enabling of the doorbells have already been done.
*/
if (kvm_vgic_global_state.has_gicv4_1) {
- unmap_all_vpes(dist);
+ unmap_all_vpes(kvm);
vlpi_avail = true;
}
@@ -437,14 +434,14 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
else
val &= ~(1 << bit_nr);
- ret = kvm_write_guest_lock(kvm, ptr, &val, 1);
+ ret = vgic_write_guest_lock(kvm, ptr, &val, 1);
if (ret)
goto out;
}
out:
if (vlpi_avail)
- map_all_vpes(dist);
+ map_all_vpes(kvm);
return ret;
}
@@ -616,6 +613,8 @@ static const struct midr_range broken_seis[] = {
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
{},
};
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index ad06ba6c9b00..a413718be92b 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -222,6 +222,11 @@ void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val)
*val = !!(*ptr & mask);
}
+int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq)
+{
+ return request_irq(irq, vgic_v4_doorbell_handler, 0, "vcpu", vcpu);
+}
+
/**
* vgic_v4_init - Initialize the GICv4 data structures
* @kvm: Pointer to the VM being initialized
@@ -283,8 +288,7 @@ int vgic_v4_init(struct kvm *kvm)
irq_flags &= ~IRQ_NOAUTOEN;
irq_set_status_flags(irq, irq_flags);
- ret = request_irq(irq, vgic_v4_doorbell_handler,
- 0, "vcpu", vcpu);
+ ret = vgic_v4_request_vpe_irq(vcpu, irq);
if (ret) {
kvm_err("failed to allocate vcpu IRQ%d\n", irq);
/*
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 0c8da72953f0..7f7f3c5ed85a 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -6,6 +6,7 @@
#define __KVM_ARM_VGIC_NEW_H__
#include <linux/irqchip/arm-gic-common.h>
+#include <asm/kvm_mmu.h>
#define PRODUCT_ID_KVM 0x4b /* ASCII code K */
#define IMPLEMENTER_ARM 0x43b
@@ -131,6 +132,19 @@ static inline bool vgic_irq_is_multi_sgi(struct vgic_irq *irq)
return vgic_irq_get_lr_count(irq) > 1;
}
+static inline int vgic_write_guest_lock(struct kvm *kvm, gpa_t gpa,
+ const void *data, unsigned long len)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ int ret;
+
+ dist->table_write_in_progress = true;
+ ret = kvm_write_guest_lock(kvm, gpa, data, len);
+ dist->table_write_in_progress = false;
+
+ return ret;
+}
+
/*
* This struct provides an intermediate representation of the fields contained
* in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC
@@ -331,5 +345,6 @@ int vgic_v4_init(struct kvm *kvm);
void vgic_v4_teardown(struct kvm *kvm);
void vgic_v4_configure_vsgis(struct kvm *kvm);
void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val);
+int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq);
#endif
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 35e9a468d13e..95364e8bdc19 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -559,3 +559,24 @@ bool __init arch_hugetlb_valid_size(unsigned long size)
{
return __hugetlb_valid_size(size);
}
+
+pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+{
+ if (IS_ENABLED(CONFIG_ARM64_ERRATUM_2645198) &&
+ cpus_have_const_cap(ARM64_WORKAROUND_2645198)) {
+ /*
+ * Break-before-make (BBM) is required for all user space mappings
+ * when the permission changes from executable to non-executable
+ * in cases where cpu is affected with errata #2645198.
+ */
+ if (pte_user_exec(READ_ONCE(*ptep)))
+ return huge_ptep_clear_flush(vma, addr, ptep);
+ }
+ return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+}
+
+void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t pte)
+{
+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 14c87e8d69d8..d77c9f56b7b4 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1630,3 +1630,24 @@ static int __init prevent_bootmem_remove_init(void)
}
early_initcall(prevent_bootmem_remove_init);
#endif
+
+pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+{
+ if (IS_ENABLED(CONFIG_ARM64_ERRATUM_2645198) &&
+ cpus_have_const_cap(ARM64_WORKAROUND_2645198)) {
+ /*
+ * Break-before-make (BBM) is required for all user space mappings
+ * when the permission changes from executable to non-executable
+ * in cases where cpu is affected with errata #2645198.
+ */
+ if (pte_user_exec(READ_ONCE(*ptep)))
+ return ptep_clear_flush(vma, addr, ptep);
+ }
+ return ptep_get_and_clear(vma->vm_mm, addr, ptep);
+}
+
+void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t pte)
+{
+ set_pte_at(vma->vm_mm, addr, ptep, pte);
+}
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index a86ee376920a..dfeb2c51e257 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -71,6 +71,7 @@ WORKAROUND_2038923
WORKAROUND_2064142
WORKAROUND_2077057
WORKAROUND_2457168
+WORKAROUND_2645198
WORKAROUND_2658417
WORKAROUND_TRBE_OVERWRITE_FILL_MODE
WORKAROUND_TSB_FLUSH_FAILURE
diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c
index 94680521fbf9..8895df121540 100644
--- a/arch/ia64/kernel/elfcore.c
+++ b/arch/ia64/kernel/elfcore.c
@@ -7,7 +7,7 @@
#include <asm/elf.h>
-Elf64_Half elf_core_extra_phdrs(void)
+Elf64_Half elf_core_extra_phdrs(struct coredump_params *cprm)
{
return GATE_EHDR->e_phnum;
}
@@ -60,7 +60,7 @@ int elf_core_write_extra_data(struct coredump_params *cprm)
return 1;
}
-size_t elf_core_extra_data_size(void)
+size_t elf_core_extra_data_size(struct coredump_params *cprm)
{
const struct elf_phdr *const gate_phdrs =
(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index f6a502e8f02c..6e948d015332 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -170,6 +170,9 @@ ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, u
asmlinkage long
ia64_clock_getres(const clockid_t which_clock, struct __kernel_timespec __user *tp)
{
+ struct timespec64 rtn_tp;
+ s64 tick_ns;
+
/*
* ia64's clock_gettime() syscall is implemented as a vdso call
* fsys_clock_gettime(). Currently it handles only
@@ -185,8 +188,8 @@ ia64_clock_getres(const clockid_t which_clock, struct __kernel_timespec __user *
switch (which_clock) {
case CLOCK_REALTIME:
case CLOCK_MONOTONIC:
- s64 tick_ns = DIV_ROUND_UP(NSEC_PER_SEC, local_cpu_data->itc_freq);
- struct timespec64 rtn_tp = ns_to_timespec64(tick_ns);
+ tick_ns = DIV_ROUND_UP(NSEC_PER_SEC, local_cpu_data->itc_freq);
+ rtn_tp = ns_to_timespec64(tick_ns);
return put_timespec64(&rtn_tp, tp);
}
diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h
index 90f9d3399b2a..3418d32d4fc7 100644
--- a/arch/loongarch/include/asm/ftrace.h
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -10,8 +10,6 @@
#define FTRACE_REGS_PLT_IDX 1
#define NR_FTRACE_PLTS 2
-#define GRAPH_FAKE_OFFSET (sizeof(struct pt_regs) - offsetof(struct pt_regs, regs[1]))
-
#ifdef CONFIG_FUNCTION_TRACER
#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
index c00e1512d4fa..7eedd83fd0d7 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -377,14 +377,6 @@ static inline bool unsigned_imm_check(unsigned long val, unsigned int bit)
return val < (1UL << bit);
}
-static inline unsigned long sign_extend(unsigned long val, unsigned int idx)
-{
- if (!is_imm_negative(val, idx + 1))
- return ((1UL << idx) - 1) & val;
- else
- return ~((1UL << idx) - 1) | val;
-}
-
#define DEF_EMIT_REG0I26_FORMAT(NAME, OP) \
static inline void emit_##NAME(union loongarch_instruction *insn, \
int offset) \
@@ -401,6 +393,7 @@ static inline void emit_##NAME(union loongarch_instruction *insn, \
}
DEF_EMIT_REG0I26_FORMAT(b, b_op)
+DEF_EMIT_REG0I26_FORMAT(bl, bl_op)
#define DEF_EMIT_REG1I20_FORMAT(NAME, OP) \
static inline void emit_##NAME(union loongarch_instruction *insn, \
diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h
index f2b52b9ea93d..b9dce87afd2e 100644
--- a/arch/loongarch/include/asm/unwind.h
+++ b/arch/loongarch/include/asm/unwind.h
@@ -8,7 +8,9 @@
#define _ASM_UNWIND_H
#include <linux/sched.h>
+#include <linux/ftrace.h>
+#include <asm/ptrace.h>
#include <asm/stacktrace.h>
enum unwinder_type {
@@ -20,11 +22,13 @@ struct unwind_state {
char type; /* UNWINDER_XXX */
struct stack_info stack_info;
struct task_struct *task;
- bool first, error, is_ftrace;
+ bool first, error, reset;
int graph_idx;
unsigned long sp, pc, ra;
};
+bool default_next_frame(struct unwind_state *state);
+
void unwind_start(struct unwind_state *state,
struct task_struct *task, struct pt_regs *regs);
bool unwind_next_frame(struct unwind_state *state);
@@ -40,4 +44,39 @@ static inline bool unwind_error(struct unwind_state *state)
return state->error;
}
+#define GRAPH_FAKE_OFFSET (sizeof(struct pt_regs) - offsetof(struct pt_regs, regs[1]))
+
+static inline unsigned long unwind_graph_addr(struct unwind_state *state,
+ unsigned long pc, unsigned long cfa)
+{
+ return ftrace_graph_ret_addr(state->task, &state->graph_idx,
+ pc, (unsigned long *)(cfa - GRAPH_FAKE_OFFSET));
+}
+
+static __always_inline void __unwind_start(struct unwind_state *state,
+ struct task_struct *task, struct pt_regs *regs)
+{
+ memset(state, 0, sizeof(*state));
+ if (regs) {
+ state->sp = regs->regs[3];
+ state->pc = regs->csr_era;
+ state->ra = regs->regs[1];
+ } else if (task && task != current) {
+ state->sp = thread_saved_fp(task);
+ state->pc = thread_saved_ra(task);
+ state->ra = 0;
+ } else {
+ state->sp = (unsigned long)__builtin_frame_address(0);
+ state->pc = (unsigned long)__builtin_return_address(0);
+ state->ra = 0;
+ }
+ state->task = task;
+ get_stack_info(state->sp, state->task, &state->stack_info);
+ state->pc = unwind_graph_addr(state, state->pc, state->sp);
+}
+
+static __always_inline unsigned long __unwind_get_return_address(struct unwind_state *state)
+{
+ return unwind_done(state) ? 0 : state->pc;
+}
#endif /* _ASM_UNWIND_H */
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index fcaa024a685e..c8cfbd562921 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -8,7 +8,7 @@ extra-y := vmlinux.lds
obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
traps.o irq.o idle.o process.o dma.o mem.o io.o reset.o switch.o \
elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o \
- alternative.o unaligned.o
+ alternative.o unaligned.o unwind.o
obj-$(CONFIG_ACPI) += acpi.o
obj-$(CONFIG_EFI) += efi.o
diff --git a/arch/loongarch/kernel/alternative.c b/arch/loongarch/kernel/alternative.c
index c5aebeac960b..4ad13847e962 100644
--- a/arch/loongarch/kernel/alternative.c
+++ b/arch/loongarch/kernel/alternative.c
@@ -74,7 +74,7 @@ static void __init_or_module recompute_jump(union loongarch_instruction *buf,
switch (src->reg0i26_format.opcode) {
case b_op:
case bl_op:
- jump_addr = cur_pc + sign_extend((si_h << 16 | si_l) << 2, 27);
+ jump_addr = cur_pc + sign_extend64((si_h << 16 | si_l) << 2, 27);
if (in_alt_jump(jump_addr, start, end))
return;
offset = jump_addr - pc;
@@ -93,7 +93,7 @@ static void __init_or_module recompute_jump(union loongarch_instruction *buf,
fallthrough;
case beqz_op:
case bnez_op:
- jump_addr = cur_pc + sign_extend((si_h << 16 | si_l) << 2, 22);
+ jump_addr = cur_pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
if (in_alt_jump(jump_addr, start, end))
return;
offset = jump_addr - pc;
@@ -112,7 +112,7 @@ static void __init_or_module recompute_jump(union loongarch_instruction *buf,
case bge_op:
case bltu_op:
case bgeu_op:
- jump_addr = cur_pc + sign_extend(si << 2, 17);
+ jump_addr = cur_pc + sign_extend64(si << 2, 17);
if (in_alt_jump(jump_addr, start, end))
return;
offset = jump_addr - pc;
diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c
index 255a09876ef2..3a3fce2d7846 100644
--- a/arch/loongarch/kernel/cpu-probe.c
+++ b/arch/loongarch/kernel/cpu-probe.c
@@ -94,7 +94,7 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
c->options = LOONGARCH_CPU_CPUCFG | LOONGARCH_CPU_CSR |
LOONGARCH_CPU_TLB | LOONGARCH_CPU_VINT | LOONGARCH_CPU_WATCH;
- elf_hwcap |= HWCAP_LOONGARCH_CRC32;
+ elf_hwcap = HWCAP_LOONGARCH_CPUCFG | HWCAP_LOONGARCH_CRC32;
config = read_cpucfg(LOONGARCH_CPUCFG1);
if (config & CPUCFG1_UAL) {
diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S
index 75e5be807a0d..7e5c293ed89f 100644
--- a/arch/loongarch/kernel/genex.S
+++ b/arch/loongarch/kernel/genex.S
@@ -67,14 +67,17 @@ SYM_FUNC_END(except_vec_cex)
.macro BUILD_HANDLER exception handler prep
.align 5
SYM_FUNC_START(handle_\exception)
+ 666:
BACKUP_T0T1
SAVE_ALL
build_prep_\prep
move a0, sp
la.abs t0, do_\handler
jirl ra, t0, 0
+ 668:
RESTORE_ALL_AND_RET
SYM_FUNC_END(handle_\exception)
+ SYM_DATA(unwind_hint_\exception, .word 668b - 666b)
.endm
BUILD_HANDLER ade ade badv
diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c
index 512579d79b22..badc59087042 100644
--- a/arch/loongarch/kernel/inst.c
+++ b/arch/loongarch/kernel/inst.c
@@ -58,7 +58,6 @@ u32 larch_insn_gen_nop(void)
u32 larch_insn_gen_b(unsigned long pc, unsigned long dest)
{
long offset = dest - pc;
- unsigned int immediate_l, immediate_h;
union loongarch_instruction insn;
if ((offset & 3) || offset < -SZ_128M || offset >= SZ_128M) {
@@ -66,15 +65,7 @@ u32 larch_insn_gen_b(unsigned long pc, unsigned long dest)
return INSN_BREAK;
}
- offset >>= 2;
-
- immediate_l = offset & 0xffff;
- offset >>= 16;
- immediate_h = offset & 0x3ff;
-
- insn.reg0i26_format.opcode = b_op;
- insn.reg0i26_format.immediate_l = immediate_l;
- insn.reg0i26_format.immediate_h = immediate_h;
+ emit_b(&insn, offset >> 2);
return insn.word;
}
@@ -82,7 +73,6 @@ u32 larch_insn_gen_b(unsigned long pc, unsigned long dest)
u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest)
{
long offset = dest - pc;
- unsigned int immediate_l, immediate_h;
union loongarch_instruction insn;
if ((offset & 3) || offset < -SZ_128M || offset >= SZ_128M) {
@@ -90,15 +80,7 @@ u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest)
return INSN_BREAK;
}
- offset >>= 2;
-
- immediate_l = offset & 0xffff;
- offset >>= 16;
- immediate_h = offset & 0x3ff;
-
- insn.reg0i26_format.opcode = bl_op;
- insn.reg0i26_format.immediate_l = immediate_l;
- insn.reg0i26_format.immediate_h = immediate_h;
+ emit_bl(&insn, offset >> 2);
return insn.word;
}
@@ -107,10 +89,7 @@ u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, enum loongar
{
union loongarch_instruction insn;
- insn.reg3_format.opcode = or_op;
- insn.reg3_format.rd = rd;
- insn.reg3_format.rj = rj;
- insn.reg3_format.rk = rk;
+ emit_or(&insn, rd, rj, rk);
return insn.word;
}
@@ -124,9 +103,7 @@ u32 larch_insn_gen_lu12iw(enum loongarch_gpr rd, int imm)
{
union loongarch_instruction insn;
- insn.reg1i20_format.opcode = lu12iw_op;
- insn.reg1i20_format.rd = rd;
- insn.reg1i20_format.immediate = imm;
+ emit_lu12iw(&insn, rd, imm);
return insn.word;
}
@@ -135,9 +112,7 @@ u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm)
{
union loongarch_instruction insn;
- insn.reg1i20_format.opcode = lu32id_op;
- insn.reg1i20_format.rd = rd;
- insn.reg1i20_format.immediate = imm;
+ emit_lu32id(&insn, rd, imm);
return insn.word;
}
@@ -146,10 +121,7 @@ u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm)
{
union loongarch_instruction insn;
- insn.reg2i12_format.opcode = lu52id_op;
- insn.reg2i12_format.rd = rd;
- insn.reg2i12_format.rj = rj;
- insn.reg2i12_format.immediate = imm;
+ emit_lu52id(&insn, rd, rj, imm);
return insn.word;
}
@@ -158,10 +130,7 @@ u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, unsigned l
{
union loongarch_instruction insn;
- insn.reg2i16_format.opcode = jirl_op;
- insn.reg2i16_format.rd = rd;
- insn.reg2i16_format.rj = rj;
- insn.reg2i16_format.immediate = (dest - pc) >> 2;
+ emit_jirl(&insn, rj, rd, (dest - pc) >> 2);
return insn.word;
}
diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
index c583b1ef1f44..edfd220a3737 100644
--- a/arch/loongarch/kernel/process.c
+++ b/arch/loongarch/kernel/process.c
@@ -191,20 +191,14 @@ out:
unsigned long __get_wchan(struct task_struct *task)
{
- unsigned long pc;
+ unsigned long pc = 0;
struct unwind_state state;
if (!try_get_task_stack(task))
return 0;
- unwind_start(&state, task, NULL);
- state.sp = thread_saved_fp(task);
- get_stack_info(state.sp, state.task, &state.stack_info);
- state.pc = thread_saved_ra(task);
-#ifdef CONFIG_UNWINDER_PROLOGUE
- state.type = UNWINDER_PROLOGUE;
-#endif
- for (; !unwind_done(&state); unwind_next_frame(&state)) {
+ for (unwind_start(&state, task, NULL);
+ !unwind_done(&state); unwind_next_frame(&state)) {
pc = unwind_get_return_address(&state);
if (!pc)
break;
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index 7ea62faeeadb..c38a146a973b 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -72,9 +72,6 @@ static void show_backtrace(struct task_struct *task, const struct pt_regs *regs,
if (!task)
task = current;
- if (user_mode(regs))
- state.type = UNWINDER_GUESS;
-
printk("%sCall Trace:", loglvl);
for (unwind_start(&state, task, pregs);
!unwind_done(&state); unwind_next_frame(&state)) {
diff --git a/arch/loongarch/kernel/unwind.c b/arch/loongarch/kernel/unwind.c
new file mode 100644
index 000000000000..a463d6961344
--- /dev/null
+++ b/arch/loongarch/kernel/unwind.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2023 Loongson Technology Corporation Limited
+ */
+#include <linux/kernel.h>
+#include <linux/ftrace.h>
+
+#include <asm/unwind.h>
+
+bool default_next_frame(struct unwind_state *state)
+{
+ struct stack_info *info = &state->stack_info;
+ unsigned long addr;
+
+ if (unwind_done(state))
+ return false;
+
+ do {
+ for (state->sp += sizeof(unsigned long);
+ state->sp < info->end; state->sp += sizeof(unsigned long)) {
+ addr = *(unsigned long *)(state->sp);
+ state->pc = unwind_graph_addr(state, addr, state->sp + 8);
+ if (__kernel_text_address(state->pc))
+ return true;
+ }
+
+ state->sp = info->next_sp;
+
+ } while (!get_stack_info(state->sp, state->task, info));
+
+ return false;
+}
diff --git a/arch/loongarch/kernel/unwind_guess.c b/arch/loongarch/kernel/unwind_guess.c
index e2d2e4f3001f..98379b7d4147 100644
--- a/arch/loongarch/kernel/unwind_guess.c
+++ b/arch/loongarch/kernel/unwind_guess.c
@@ -2,37 +2,18 @@
/*
* Copyright (C) 2022 Loongson Technology Corporation Limited
*/
-#include <linux/kernel.h>
-#include <linux/ftrace.h>
-
#include <asm/unwind.h>
unsigned long unwind_get_return_address(struct unwind_state *state)
{
- if (unwind_done(state))
- return 0;
- else if (state->first)
- return state->pc;
-
- return *(unsigned long *)(state->sp);
+ return __unwind_get_return_address(state);
}
EXPORT_SYMBOL_GPL(unwind_get_return_address);
void unwind_start(struct unwind_state *state, struct task_struct *task,
struct pt_regs *regs)
{
- memset(state, 0, sizeof(*state));
-
- if (regs) {
- state->sp = regs->regs[3];
- state->pc = regs->csr_era;
- }
-
- state->task = task;
- state->first = true;
-
- get_stack_info(state->sp, state->task, &state->stack_info);
-
+ __unwind_start(state, task, regs);
if (!unwind_done(state) && !__kernel_text_address(state->pc))
unwind_next_frame(state);
}
@@ -40,30 +21,6 @@ EXPORT_SYMBOL_GPL(unwind_start);
bool unwind_next_frame(struct unwind_state *state)
{
- struct stack_info *info = &state->stack_info;
- unsigned long addr;
-
- if (unwind_done(state))
- return false;
-
- if (state->first)
- state->first = false;
-
- do {
- for (state->sp += sizeof(unsigned long);
- state->sp < info->end;
- state->sp += sizeof(unsigned long)) {
- addr = *(unsigned long *)(state->sp);
- state->pc = ftrace_graph_ret_addr(state->task, &state->graph_idx,
- addr, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET));
- if (__kernel_text_address(addr))
- return true;
- }
-
- state->sp = info->next_sp;
-
- } while (!get_stack_info(state->sp, state->task, info));
-
- return false;
+ return default_next_frame(state);
}
EXPORT_SYMBOL_GPL(unwind_next_frame);
diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c
index 0f8d1451ebb8..9095fde8e55d 100644
--- a/arch/loongarch/kernel/unwind_prologue.c
+++ b/arch/loongarch/kernel/unwind_prologue.c
@@ -2,61 +2,116 @@
/*
* Copyright (C) 2022 Loongson Technology Corporation Limited
*/
+#include <linux/cpumask.h>
#include <linux/ftrace.h>
#include <linux/kallsyms.h>
#include <asm/inst.h>
+#include <asm/loongson.h>
#include <asm/ptrace.h>
+#include <asm/setup.h>
#include <asm/unwind.h>
-static inline void unwind_state_fixup(struct unwind_state *state)
-{
-#ifdef CONFIG_DYNAMIC_FTRACE
- static unsigned long ftrace = (unsigned long)ftrace_call + 4;
-
- if (state->pc == ftrace)
- state->is_ftrace = true;
+extern const int unwind_hint_ade;
+extern const int unwind_hint_ale;
+extern const int unwind_hint_bp;
+extern const int unwind_hint_fpe;
+extern const int unwind_hint_fpu;
+extern const int unwind_hint_lsx;
+extern const int unwind_hint_lasx;
+extern const int unwind_hint_lbt;
+extern const int unwind_hint_ri;
+extern const int unwind_hint_watch;
+extern unsigned long eentry;
+#ifdef CONFIG_NUMA
+extern unsigned long pcpu_handlers[NR_CPUS];
#endif
-}
-unsigned long unwind_get_return_address(struct unwind_state *state)
+static inline bool scan_handlers(unsigned long entry_offset)
{
+ int idx, offset;
- if (unwind_done(state))
- return 0;
- else if (state->type)
- return state->pc;
- else if (state->first)
- return state->pc;
-
- return *(unsigned long *)(state->sp);
+ if (entry_offset >= EXCCODE_INT_START * VECSIZE)
+ return false;
+ idx = entry_offset / VECSIZE;
+ offset = entry_offset % VECSIZE;
+ switch (idx) {
+ case EXCCODE_ADE:
+ return offset == unwind_hint_ade;
+ case EXCCODE_ALE:
+ return offset == unwind_hint_ale;
+ case EXCCODE_BP:
+ return offset == unwind_hint_bp;
+ case EXCCODE_FPE:
+ return offset == unwind_hint_fpe;
+ case EXCCODE_FPDIS:
+ return offset == unwind_hint_fpu;
+ case EXCCODE_LSXDIS:
+ return offset == unwind_hint_lsx;
+ case EXCCODE_LASXDIS:
+ return offset == unwind_hint_lasx;
+ case EXCCODE_BTDIS:
+ return offset == unwind_hint_lbt;
+ case EXCCODE_INE:
+ return offset == unwind_hint_ri;
+ case EXCCODE_WATCH:
+ return offset == unwind_hint_watch;
+ default:
+ return false;
+ }
}
-EXPORT_SYMBOL_GPL(unwind_get_return_address);
-static bool unwind_by_guess(struct unwind_state *state)
+static inline bool fix_exception(unsigned long pc)
{
- struct stack_info *info = &state->stack_info;
- unsigned long addr;
-
- for (state->sp += sizeof(unsigned long);
- state->sp < info->end;
- state->sp += sizeof(unsigned long)) {
- addr = *(unsigned long *)(state->sp);
- state->pc = ftrace_graph_ret_addr(state->task, &state->graph_idx,
- addr, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET));
- if (__kernel_text_address(addr))
+#ifdef CONFIG_NUMA
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ if (!pcpu_handlers[cpu])
+ continue;
+ if (scan_handlers(pc - pcpu_handlers[cpu]))
return true;
}
+#endif
+ return scan_handlers(pc - eentry);
+}
+/*
+ * As we meet ftrace_regs_entry, reset first flag like first doing
+ * tracing. Prologue analysis will stop soon because PC is at entry.
+ */
+static inline bool fix_ftrace(unsigned long pc)
+{
+#ifdef CONFIG_DYNAMIC_FTRACE
+ return pc == (unsigned long)ftrace_call + LOONGARCH_INSN_SIZE;
+#else
return false;
+#endif
}
+static inline bool unwind_state_fixup(struct unwind_state *state)
+{
+ if (!fix_exception(state->pc) && !fix_ftrace(state->pc))
+ return false;
+
+ state->reset = true;
+ return true;
+}
+
+/*
+ * LoongArch function prologue is like follows,
+ * [instructions not use stack var]
+ * addi.d sp, sp, -imm
+ * st.d xx, sp, offset <- save callee saved regs and
+ * st.d yy, sp, offset save ra if function is nest.
+ * [others instructions]
+ */
static bool unwind_by_prologue(struct unwind_state *state)
{
long frame_ra = -1;
unsigned long frame_size = 0;
- unsigned long size, offset, pc = state->pc;
+ unsigned long size, offset, pc;
struct pt_regs *regs;
struct stack_info *info = &state->stack_info;
union loongarch_instruction *ip, *ip_end;
@@ -64,20 +119,21 @@ static bool unwind_by_prologue(struct unwind_state *state)
if (state->sp >= info->end || state->sp < info->begin)
return false;
- if (state->is_ftrace) {
- /*
- * As we meet ftrace_regs_entry, reset first flag like first doing
- * tracing. Prologue analysis will stop soon because PC is at entry.
- */
+ if (state->reset) {
regs = (struct pt_regs *)state->sp;
state->first = true;
- state->is_ftrace = false;
+ state->reset = false;
state->pc = regs->csr_era;
state->ra = regs->regs[1];
state->sp = regs->regs[3];
return true;
}
+ /*
+ * When first is not set, the PC is a return address in the previous frame.
+ * We need to adjust its value in case overflow to the next symbol.
+ */
+ pc = state->pc - (state->first ? 0 : LOONGARCH_INSN_SIZE);
if (!kallsyms_lookup_size_offset(pc, &size, &offset))
return false;
@@ -93,6 +149,10 @@ static bool unwind_by_prologue(struct unwind_state *state)
ip++;
}
+ /*
+ * Can't find stack alloc action, PC may be in a leaf function. Only the
+ * first being true is reasonable, otherwise indicate analysis is broken.
+ */
if (!frame_size) {
if (state->first)
goto first;
@@ -110,6 +170,7 @@ static bool unwind_by_prologue(struct unwind_state *state)
ip++;
}
+ /* Can't find save $ra action, PC may be in a leaf function, too. */
if (frame_ra < 0) {
if (state->first) {
state->sp = state->sp + frame_size;
@@ -118,88 +179,47 @@ static bool unwind_by_prologue(struct unwind_state *state)
return false;
}
- if (state->first)
- state->first = false;
-
state->pc = *(unsigned long *)(state->sp + frame_ra);
state->sp = state->sp + frame_size;
goto out;
first:
- state->first = false;
- if (state->pc == state->ra)
- return false;
-
state->pc = state->ra;
out:
- unwind_state_fixup(state);
- return !!__kernel_text_address(state->pc);
-}
-
-void unwind_start(struct unwind_state *state, struct task_struct *task,
- struct pt_regs *regs)
-{
- memset(state, 0, sizeof(*state));
-
- if (regs && __kernel_text_address(regs->csr_era)) {
- state->pc = regs->csr_era;
- state->sp = regs->regs[3];
- state->ra = regs->regs[1];
- state->type = UNWINDER_PROLOGUE;
- }
-
- state->task = task;
- state->first = true;
-
- get_stack_info(state->sp, state->task, &state->stack_info);
-
- if (!unwind_done(state) && !__kernel_text_address(state->pc))
- unwind_next_frame(state);
+ state->first = false;
+ return unwind_state_fixup(state) || __kernel_text_address(state->pc);
}
-EXPORT_SYMBOL_GPL(unwind_start);
-bool unwind_next_frame(struct unwind_state *state)
+static bool next_frame(struct unwind_state *state)
{
- struct stack_info *info = &state->stack_info;
- struct pt_regs *regs;
unsigned long pc;
+ struct pt_regs *regs;
+ struct stack_info *info = &state->stack_info;
if (unwind_done(state))
return false;
do {
- switch (state->type) {
- case UNWINDER_GUESS:
- state->first = false;
- if (unwind_by_guess(state))
- return true;
- break;
+ if (unwind_by_prologue(state)) {
+ state->pc = unwind_graph_addr(state, state->pc, state->sp);
+ return true;
+ }
+
+ if (info->type == STACK_TYPE_IRQ && info->end == state->sp) {
+ regs = (struct pt_regs *)info->next_sp;
+ pc = regs->csr_era;
+
+ if (user_mode(regs) || !__kernel_text_address(pc))
+ return false;
+
+ state->first = true;
+ state->pc = pc;
+ state->ra = regs->regs[1];
+ state->sp = regs->regs[3];
+ get_stack_info(state->sp, state->task, info);
- case UNWINDER_PROLOGUE:
- if (unwind_by_prologue(state)) {
- state->pc = ftrace_graph_ret_addr(state->task, &state->graph_idx,
- state->pc, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET));
- return true;
- }
-
- if (info->type == STACK_TYPE_IRQ &&
- info->end == state->sp) {
- regs = (struct pt_regs *)info->next_sp;
- pc = regs->csr_era;
-
- if (user_mode(regs) || !__kernel_text_address(pc))
- return false;
-
- state->first = true;
- state->ra = regs->regs[1];
- state->sp = regs->regs[3];
- state->pc = ftrace_graph_ret_addr(state->task, &state->graph_idx,
- pc, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET));
- get_stack_info(state->sp, state->task, info);
-
- return true;
- }
+ return true;
}
state->sp = info->next_sp;
@@ -208,4 +228,36 @@ bool unwind_next_frame(struct unwind_state *state)
return false;
}
+
+unsigned long unwind_get_return_address(struct unwind_state *state)
+{
+ return __unwind_get_return_address(state);
+}
+EXPORT_SYMBOL_GPL(unwind_get_return_address);
+
+void unwind_start(struct unwind_state *state, struct task_struct *task,
+ struct pt_regs *regs)
+{
+ __unwind_start(state, task, regs);
+ state->type = UNWINDER_PROLOGUE;
+ state->first = true;
+
+ /*
+ * The current PC is not kernel text address, we cannot find its
+ * relative symbol. Thus, prologue analysis will be broken. Luckily,
+ * we can use the default_next_frame().
+ */
+ if (!__kernel_text_address(state->pc)) {
+ state->type = UNWINDER_GUESS;
+ if (!unwind_done(state))
+ unwind_next_frame(state);
+ }
+}
+EXPORT_SYMBOL_GPL(unwind_start);
+
+bool unwind_next_frame(struct unwind_state *state)
+{
+ return state->type == UNWINDER_PROLOGUE ?
+ next_frame(state) : default_next_frame(state);
+}
EXPORT_SYMBOL_GPL(unwind_next_frame);
diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c
index da3681f131c8..8bad6b0cff59 100644
--- a/arch/loongarch/mm/tlb.c
+++ b/arch/loongarch/mm/tlb.c
@@ -251,7 +251,7 @@ static void output_pgtable_bits_defines(void)
}
#ifdef CONFIG_NUMA
-static unsigned long pcpu_handlers[NR_CPUS];
+unsigned long pcpu_handlers[NR_CPUS];
#endif
extern long exception_handlers[VECSIZE * 128 / sizeof(long)];
diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c
index 01c132bc33d5..4d06de77d92a 100644
--- a/arch/mips/ralink/of.c
+++ b/arch/mips/ralink/of.c
@@ -64,7 +64,7 @@ void __init plat_mem_setup(void)
dtb = get_fdt();
__dt_setup_arch(dtb);
- if (!early_init_dt_scan_memory())
+ if (early_init_dt_scan_memory())
return;
if (soc_info.mem_detect)
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index 4dfe1f49c5c8..6817892a2c58 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -1303,7 +1303,7 @@ static char iodc_dbuf[4096] __page_aligned_bss;
*/
int pdc_iodc_print(const unsigned char *str, unsigned count)
{
- unsigned int i;
+ unsigned int i, found = 0;
unsigned long flags;
count = min_t(unsigned int, count, sizeof(iodc_dbuf));
@@ -1315,6 +1315,7 @@ int pdc_iodc_print(const unsigned char *str, unsigned count)
iodc_dbuf[i+0] = '\r';
iodc_dbuf[i+1] = '\n';
i += 2;
+ found = 1;
goto print;
default:
iodc_dbuf[i] = str[i];
@@ -1330,7 +1331,7 @@ print:
__pa(pdc_result), 0, __pa(iodc_dbuf), i, 0);
spin_unlock_irqrestore(&pdc_lock, flags);
- return i;
+ return i - found;
}
#if !defined(BOOTLOADER)
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index 69c62933e952..ceb45f51d52e 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -126,6 +126,12 @@ long arch_ptrace(struct task_struct *child, long request,
unsigned long tmp;
long ret = -EIO;
+ unsigned long user_regs_struct_size = sizeof(struct user_regs_struct);
+#ifdef CONFIG_64BIT
+ if (is_compat_task())
+ user_regs_struct_size /= 2;
+#endif
+
switch (request) {
/* Read the word at location addr in the USER area. For ptraced
@@ -166,7 +172,7 @@ long arch_ptrace(struct task_struct *child, long request,
addr >= sizeof(struct pt_regs))
break;
if (addr == PT_IAOQ0 || addr == PT_IAOQ1) {
- data |= 3; /* ensure userspace privilege */
+ data |= PRIV_USER; /* ensure userspace privilege */
}
if ((addr >= PT_GR1 && addr <= PT_GR31) ||
addr == PT_IAOQ0 || addr == PT_IAOQ1 ||
@@ -181,14 +187,14 @@ long arch_ptrace(struct task_struct *child, long request,
return copy_regset_to_user(child,
task_user_regset_view(current),
REGSET_GENERAL,
- 0, sizeof(struct user_regs_struct),
+ 0, user_regs_struct_size,
datap);
case PTRACE_SETREGS: /* Set all gp regs in the child. */
return copy_regset_from_user(child,
task_user_regset_view(current),
REGSET_GENERAL,
- 0, sizeof(struct user_regs_struct),
+ 0, user_regs_struct_size,
datap);
case PTRACE_GETFPREGS: /* Get the child FPU state. */
@@ -285,7 +291,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
if (addr >= sizeof(struct pt_regs))
break;
if (addr == PT_IAOQ0+4 || addr == PT_IAOQ1+4) {
- data |= 3; /* ensure userspace privilege */
+ data |= PRIV_USER; /* ensure userspace privilege */
}
if (addr >= PT_FR0 && addr <= PT_FR31 + 4) {
/* Special case, fp regs are 64 bits anyway */
@@ -302,6 +308,11 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
}
}
break;
+ case PTRACE_GETREGS:
+ case PTRACE_SETREGS:
+ case PTRACE_GETFPREGS:
+ case PTRACE_SETFPREGS:
+ return arch_ptrace(child, request, addr, data);
default:
ret = compat_ptrace_request(child, request, addr, data);
@@ -484,7 +495,7 @@ static void set_reg(struct pt_regs *regs, int num, unsigned long val)
case RI(iaoq[0]):
case RI(iaoq[1]):
/* set 2 lowest bits to ensure userspace privilege: */
- regs->iaoq[num - RI(iaoq[0])] = val | 3;
+ regs->iaoq[num - RI(iaoq[0])] = val | PRIV_USER;
return;
case RI(sar): regs->sar = val;
return;
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b8c4ac56bddc..7a5f8dbfbdd0 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -163,7 +163,6 @@ config PPC
select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_WANTS_MODULES_DATA_IN_VMALLOC if PPC_BOOK3S_32 || PPC_8xx
- select ARCH_WANTS_NO_INSTR
select ARCH_WEAK_RELEASE_ACQUIRE
select BINFMT_ELF
select BUILDTIME_TABLE_SORT
diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
index 74e17e134387..27714dc2f04a 100644
--- a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
@@ -659,3 +659,19 @@
interrupts = <16 2 1 9>;
};
};
+
+&fman0_rx_0x08 {
+ /delete-property/ fsl,fman-10g-port;
+};
+
+&fman0_tx_0x28 {
+ /delete-property/ fsl,fman-10g-port;
+};
+
+&fman0_rx_0x09 {
+ /delete-property/ fsl,fman-10g-port;
+};
+
+&fman0_tx_0x29 {
+ /delete-property/ fsl,fman-10g-port;
+};
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index af04cea82b94..352d7de24018 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -210,6 +210,10 @@ ld_version()
gsub(".*version ", "");
gsub("-.*", "");
split($1,a, ".");
+ if( length(a[3]) == "8" )
+ # a[3] is probably a date of format yyyymmdd used for release snapshots. We
+ # can assume it to be zero as it does not signify a new version as such.
+ a[3] = 0;
print a[1]*100000000 + a[2]*1000000 + a[3]*10000;
exit
}'
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index dd39313242b4..2bbc0fcce04a 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -97,6 +97,8 @@ static inline void tlb_flush(struct mmu_gather *tlb)
{
if (radix_enabled())
radix__tlb_flush(tlb);
+ else
+ hash__tlb_flush(tlb);
}
#ifdef CONFIG_SMP
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 77fa88c2aed0..eb6d094083fd 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -173,6 +173,15 @@ static inline notrace unsigned long irq_soft_mask_or_return(unsigned long mask)
return flags;
}
+static inline notrace unsigned long irq_soft_mask_andc_return(unsigned long mask)
+{
+ unsigned long flags = irq_soft_mask_return();
+
+ irq_soft_mask_set(flags & ~mask);
+
+ return flags;
+}
+
static inline unsigned long arch_local_save_flags(void)
{
return irq_soft_mask_return();
@@ -192,7 +201,7 @@ static inline void arch_local_irq_enable(void)
static inline unsigned long arch_local_irq_save(void)
{
- return irq_soft_mask_set_return(IRQS_DISABLED);
+ return irq_soft_mask_or_return(IRQS_DISABLED);
}
static inline bool arch_irqs_disabled_flags(unsigned long flags)
@@ -331,10 +340,11 @@ bool power_pmu_wants_prompt_pmi(void);
* is a different soft-masked interrupt pending that requires hard
* masking.
*/
-static inline bool should_hard_irq_enable(void)
+static inline bool should_hard_irq_enable(struct pt_regs *regs)
{
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
- WARN_ON(irq_soft_mask_return() == IRQS_ENABLED);
+ WARN_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
+ WARN_ON(!(get_paca()->irq_happened & PACA_IRQ_HARD_DIS));
WARN_ON(mfmsr() & MSR_EE);
}
@@ -347,8 +357,17 @@ static inline bool should_hard_irq_enable(void)
*
* TODO: Add test for 64e
*/
- if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !power_pmu_wants_prompt_pmi())
- return false;
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) {
+ if (!power_pmu_wants_prompt_pmi())
+ return false;
+ /*
+ * If PMIs are disabled then IRQs should be disabled as well,
+ * so we shouldn't see this condition, check for it just in
+ * case because we are about to enable PMIs.
+ */
+ if (WARN_ON_ONCE(regs->softe & IRQS_PMI_DISABLED))
+ return false;
+ }
if (get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK)
return false;
@@ -358,18 +377,16 @@ static inline bool should_hard_irq_enable(void)
/*
* Do the hard enabling, only call this if should_hard_irq_enable is true.
+ * This allows PMI interrupts to profile irq handlers.
*/
static inline void do_hard_irq_enable(void)
{
- if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
- WARN_ON(irq_soft_mask_return() == IRQS_ENABLED);
- WARN_ON(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK);
- WARN_ON(mfmsr() & MSR_EE);
- }
/*
- * This allows PMI interrupts (and watchdog soft-NMIs) through.
- * There is no other reason to enable this way.
+ * Asynch interrupts come in with IRQS_ALL_DISABLED,
+ * PACA_IRQ_HARD_DIS, and MSR[EE]=0.
*/
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+ irq_soft_mask_andc_return(IRQS_PMI_DISABLED);
get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
__hard_irq_enable();
}
@@ -452,7 +469,7 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
return !(regs->msr & MSR_EE);
}
-static __always_inline bool should_hard_irq_enable(void)
+static __always_inline bool should_hard_irq_enable(struct pt_regs *regs)
{
return false;
}
diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
index 4f897993b710..699a88584ae1 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -137,7 +137,7 @@ struct imc_pmu {
* are inited.
*/
struct imc_pmu_ref {
- struct mutex lock;
+ spinlock_t lock;
unsigned int id;
int refc;
};
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index f55c6fb34a3a..5712dd846263 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -27,7 +27,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
ppc_msgsync();
- if (should_hard_irq_enable())
+ if (should_hard_irq_enable(regs))
do_hard_irq_enable();
kvmppc_clear_host_ipi(smp_processor_id());
diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S
index d438ca74e96c..fdbee1093e2b 100644
--- a/arch/powerpc/kernel/head_85xx.S
+++ b/arch/powerpc/kernel/head_85xx.S
@@ -864,7 +864,7 @@ _GLOBAL(load_up_spe)
* SPE unavailable trap from kernel - print a message, but let
* the task use SPE in the kernel until it returns to user mode.
*/
-KernelSPE:
+SYM_FUNC_START_LOCAL(KernelSPE)
lwz r3,_MSR(r1)
oris r3,r3,MSR_SPE@h
stw r3,_MSR(r1) /* enable use of SPE after return */
@@ -881,6 +881,7 @@ KernelSPE:
#endif
.align 4,0
+SYM_FUNC_END(KernelSPE)
#endif /* CONFIG_SPE */
/*
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index fc6631a80527..0ec1581619db 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -50,16 +50,18 @@ static inline bool exit_must_hard_disable(void)
*/
static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable)
{
+ bool must_hard_disable = (exit_must_hard_disable() || !restartable);
+
/* This must be done with RI=1 because tracing may touch vmaps */
trace_hardirqs_on();
- if (exit_must_hard_disable() || !restartable)
+ if (must_hard_disable)
__hard_EE_RI_disable();
#ifdef CONFIG_PPC64
/* This pattern matches prep_irq_for_idle */
if (unlikely(lazy_irq_pending_nocheck())) {
- if (exit_must_hard_disable() || !restartable) {
+ if (must_hard_disable) {
local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
__hard_RI_enable();
}
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index c5b9ce887483..c9535f2760b5 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -238,7 +238,7 @@ static void __do_irq(struct pt_regs *regs, unsigned long oldsp)
irq = static_call(ppc_get_irq)();
/* We can hard enable interrupts now to allow perf interrupts */
- if (should_hard_irq_enable())
+ if (should_hard_irq_enable(regs))
do_hard_irq_enable();
/* And finally process it */
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index d68de3618741..e26eb6618ae5 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -515,7 +515,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
}
/* Conditionally hard-enable interrupts. */
- if (should_hard_irq_enable()) {
+ if (should_hard_irq_enable(regs)) {
/*
* Ensure a positive value is written to the decrementer, or
* else some CPUs will continue to take decrementer exceptions.
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 8c3862b4c259..958e77a24f85 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -8,6 +8,7 @@
#define BSS_FIRST_SECTIONS *(.bss.prominit)
#define EMITS_PT_NOTE
#define RO_EXCEPTION_TABLE_ALIGN 0
+#define RUNTIME_DISCARD_EXIT
#define SOFT_MASK_TABLE(align) \
. = ALIGN(align); \
@@ -410,9 +411,12 @@ SECTIONS
DISCARDS
/DISCARD/ : {
*(*.EMB.apuinfo)
- *(.glink .iplt .plt .rela* .comment)
+ *(.glink .iplt .plt)
*(.gnu.version*)
*(.gnu.attributes)
*(.eh_frame)
+#ifndef CONFIG_RELOCATABLE
+ *(.rela*)
+#endif
}
}
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index af8854f9eae3..9be3e818a240 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -26,6 +26,7 @@
#include <asm/firmware.h>
#include <asm/kexec_ranges.h>
#include <asm/crashdump-ppc64.h>
+#include <asm/mmzone.h>
#include <asm/prom.h>
struct umem_info {
@@ -989,10 +990,13 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
* linux,drconf-usable-memory properties. Get an approximate on the
* number of usable memory entries and use for FDT size estimation.
*/
- usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) +
- (2 * (resource_size(&crashk_res) / drmem_lmb_size())));
-
- extra_size = (unsigned int)(usm_entries * sizeof(u64));
+ if (drmem_lmb_size()) {
+ usm_entries = ((memory_hotplug_max() / drmem_lmb_size()) +
+ (2 * (resource_size(&crashk_res) / drmem_lmb_size())));
+ extra_size = (unsigned int)(usm_entries * sizeof(u64));
+ } else {
+ extra_size = 0;
+ }
/*
* Get the number of CPU nodes in the current DT. This allows to
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 0dce93ccaadf..e89281d3ba28 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -912,16 +912,15 @@ static int kvmppc_handle_debug(struct kvm_vcpu *vcpu)
static void kvmppc_fill_pt_regs(struct pt_regs *regs)
{
- ulong r1, ip, msr, lr;
+ ulong r1, msr, lr;
asm("mr %0, 1" : "=r"(r1));
asm("mflr %0" : "=r"(lr));
asm("mfmsr %0" : "=r"(msr));
- asm("bl 1f; 1: mflr %0" : "=r"(ip));
memset(regs, 0, sizeof(*regs));
regs->gpr[1] = r1;
- regs->nip = ip;
+ regs->nip = _THIS_IP_;
regs->msr = msr;
regs->link = lr;
}
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 80a148c57de8..44a35ed4f686 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1012,7 +1012,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
void hpt_clear_stress(void);
static struct timer_list stress_hpt_timer;
-void stress_hpt_timer_fn(struct timer_list *timer)
+static void stress_hpt_timer_fn(struct timer_list *timer)
{
int next_cpu;
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index cac727b01799..26245aaf12b8 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -234,6 +234,14 @@ void radix__mark_rodata_ro(void)
end = (unsigned long)__end_rodata;
radix__change_memory_range(start, end, _PAGE_WRITE);
+
+ for (start = PAGE_OFFSET; start < (unsigned long)_stext; start += PAGE_SIZE) {
+ end = start + PAGE_SIZE;
+ if (overlaps_interrupt_vector_text(start, end))
+ radix__change_memory_range(start, end, _PAGE_WRITE);
+ else
+ break;
+ }
}
void radix__mark_initmem_nx(void)
@@ -262,6 +270,22 @@ print_mapping(unsigned long start, unsigned long end, unsigned long size, bool e
static unsigned long next_boundary(unsigned long addr, unsigned long end)
{
#ifdef CONFIG_STRICT_KERNEL_RWX
+ unsigned long stext_phys;
+
+ stext_phys = __pa_symbol(_stext);
+
+ // Relocatable kernel running at non-zero real address
+ if (stext_phys != 0) {
+ // The end of interrupts code at zero is a rodata boundary
+ unsigned long end_intr = __pa_symbol(__end_interrupts) - stext_phys;
+ if (addr < end_intr)
+ return end_intr;
+
+ // Start of relocated kernel text is a rodata boundary
+ if (addr < stext_phys)
+ return stext_phys;
+ }
+
if (addr < __pa_symbol(__srwx_boundary))
return __pa_symbol(__srwx_boundary);
#endif
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index d517aba94d1b..9d229ef7f86e 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -14,6 +14,7 @@
#include <asm/cputhreads.h>
#include <asm/smp.h>
#include <linux/string.h>
+#include <linux/spinlock.h>
/* Nest IMC data structures and variables */
@@ -50,7 +51,7 @@ static int trace_imc_mem_size;
* core and trace-imc
*/
static struct imc_pmu_ref imc_global_refc = {
- .lock = __MUTEX_INITIALIZER(imc_global_refc.lock),
+ .lock = __SPIN_LOCK_INITIALIZER(imc_global_refc.lock),
.id = 0,
.refc = 0,
};
@@ -400,7 +401,7 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
get_hard_smp_processor_id(cpu));
/*
* If this is the last cpu in this chip then, skip the reference
- * count mutex lock and make the reference count on this chip zero.
+ * count lock and make the reference count on this chip zero.
*/
ref = get_nest_pmu_ref(cpu);
if (!ref)
@@ -462,15 +463,15 @@ static void nest_imc_counters_release(struct perf_event *event)
/*
* See if we need to disable the nest PMU.
* If no events are currently in use, then we have to take a
- * mutex to ensure that we don't race with another task doing
+ * lock to ensure that we don't race with another task doing
* enable or disable the nest counters.
*/
ref = get_nest_pmu_ref(event->cpu);
if (!ref)
return;
- /* Take the mutex lock for this node and then decrement the reference count */
- mutex_lock(&ref->lock);
+ /* Take the lock for this node and then decrement the reference count */
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
/*
* The scenario where this is true is, when perf session is
@@ -482,7 +483,7 @@ static void nest_imc_counters_release(struct perf_event *event)
* an OPAL call to disable the engine in that node.
*
*/
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
return;
}
ref->refc--;
@@ -490,7 +491,7 @@ static void nest_imc_counters_release(struct perf_event *event)
rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
get_hard_smp_processor_id(event->cpu));
if (rc) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("nest-imc: Unable to stop the counters for core %d\n", node_id);
return;
}
@@ -498,7 +499,7 @@ static void nest_imc_counters_release(struct perf_event *event)
WARN(1, "nest-imc: Invalid event reference count\n");
ref->refc = 0;
}
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
}
static int nest_imc_event_init(struct perf_event *event)
@@ -557,26 +558,25 @@ static int nest_imc_event_init(struct perf_event *event)
/*
* Get the imc_pmu_ref struct for this node.
- * Take the mutex lock and then increment the count of nest pmu events
- * inited.
+ * Take the lock and then increment the count of nest pmu events inited.
*/
ref = get_nest_pmu_ref(event->cpu);
if (!ref)
return -EINVAL;
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST,
get_hard_smp_processor_id(event->cpu));
if (rc) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("nest-imc: Unable to start the counters for node %d\n",
node_id);
return rc;
}
}
++ref->refc;
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
event->destroy = nest_imc_counters_release;
return 0;
@@ -612,9 +612,8 @@ static int core_imc_mem_init(int cpu, int size)
return -ENOMEM;
mem_info->vbase = page_address(page);
- /* Init the mutex */
core_imc_refc[core_id].id = core_id;
- mutex_init(&core_imc_refc[core_id].lock);
+ spin_lock_init(&core_imc_refc[core_id].lock);
rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE,
__pa((void *)mem_info->vbase),
@@ -703,9 +702,8 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)
perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu);
} else {
/*
- * If this is the last cpu in this core then, skip taking refernce
- * count mutex lock for this core and directly zero "refc" for
- * this core.
+ * If this is the last cpu in this core then skip taking reference
+ * count lock for this core and directly zero "refc" for this core.
*/
opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(cpu));
@@ -720,11 +718,11 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)
* last cpu in this core and core-imc event running
* in this cpu.
*/
- mutex_lock(&imc_global_refc.lock);
+ spin_lock(&imc_global_refc.lock);
if (imc_global_refc.id == IMC_DOMAIN_CORE)
imc_global_refc.refc--;
- mutex_unlock(&imc_global_refc.lock);
+ spin_unlock(&imc_global_refc.lock);
}
return 0;
}
@@ -739,7 +737,7 @@ static int core_imc_pmu_cpumask_init(void)
static void reset_global_refc(struct perf_event *event)
{
- mutex_lock(&imc_global_refc.lock);
+ spin_lock(&imc_global_refc.lock);
imc_global_refc.refc--;
/*
@@ -751,7 +749,7 @@ static void reset_global_refc(struct perf_event *event)
imc_global_refc.refc = 0;
imc_global_refc.id = 0;
}
- mutex_unlock(&imc_global_refc.lock);
+ spin_unlock(&imc_global_refc.lock);
}
static void core_imc_counters_release(struct perf_event *event)
@@ -764,17 +762,17 @@ static void core_imc_counters_release(struct perf_event *event)
/*
* See if we need to disable the IMC PMU.
* If no events are currently in use, then we have to take a
- * mutex to ensure that we don't race with another task doing
+ * lock to ensure that we don't race with another task doing
* enable or disable the core counters.
*/
core_id = event->cpu / threads_per_core;
- /* Take the mutex lock and decrement the refernce count for this core */
+ /* Take the lock and decrement the refernce count for this core */
ref = &core_imc_refc[core_id];
if (!ref)
return;
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
/*
* The scenario where this is true is, when perf session is
@@ -786,7 +784,7 @@ static void core_imc_counters_release(struct perf_event *event)
* an OPAL call to disable the engine in that core.
*
*/
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
return;
}
ref->refc--;
@@ -794,7 +792,7 @@ static void core_imc_counters_release(struct perf_event *event)
rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(event->cpu));
if (rc) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("IMC: Unable to stop the counters for core %d\n", core_id);
return;
}
@@ -802,7 +800,7 @@ static void core_imc_counters_release(struct perf_event *event)
WARN(1, "core-imc: Invalid event reference count\n");
ref->refc = 0;
}
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
reset_global_refc(event);
}
@@ -840,7 +838,6 @@ static int core_imc_event_init(struct perf_event *event)
if ((!pcmi->vbase))
return -ENODEV;
- /* Get the core_imc mutex for this core */
ref = &core_imc_refc[core_id];
if (!ref)
return -EINVAL;
@@ -848,22 +845,22 @@ static int core_imc_event_init(struct perf_event *event)
/*
* Core pmu units are enabled only when it is used.
* See if this is triggered for the first time.
- * If yes, take the mutex lock and enable the core counters.
+ * If yes, take the lock and enable the core counters.
* If not, just increment the count in core_imc_refc struct.
*/
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(event->cpu));
if (rc) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("core-imc: Unable to start the counters for core %d\n",
core_id);
return rc;
}
}
++ref->refc;
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
/*
* Since the system can run either in accumulation or trace-mode
@@ -874,7 +871,7 @@ static int core_imc_event_init(struct perf_event *event)
* to know whether any other trace/thread imc
* events are running.
*/
- mutex_lock(&imc_global_refc.lock);
+ spin_lock(&imc_global_refc.lock);
if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_CORE) {
/*
* No other trace/thread imc events are running in
@@ -883,10 +880,10 @@ static int core_imc_event_init(struct perf_event *event)
imc_global_refc.id = IMC_DOMAIN_CORE;
imc_global_refc.refc++;
} else {
- mutex_unlock(&imc_global_refc.lock);
+ spin_unlock(&imc_global_refc.lock);
return -EBUSY;
}
- mutex_unlock(&imc_global_refc.lock);
+ spin_unlock(&imc_global_refc.lock);
event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK);
event->destroy = core_imc_counters_release;
@@ -958,10 +955,10 @@ static int ppc_thread_imc_cpu_offline(unsigned int cpu)
mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
/* Reduce the refc if thread-imc event running on this cpu */
- mutex_lock(&imc_global_refc.lock);
+ spin_lock(&imc_global_refc.lock);
if (imc_global_refc.id == IMC_DOMAIN_THREAD)
imc_global_refc.refc--;
- mutex_unlock(&imc_global_refc.lock);
+ spin_unlock(&imc_global_refc.lock);
return 0;
}
@@ -1001,7 +998,7 @@ static int thread_imc_event_init(struct perf_event *event)
if (!target)
return -EINVAL;
- mutex_lock(&imc_global_refc.lock);
+ spin_lock(&imc_global_refc.lock);
/*
* Check if any other trace/core imc events are running in the
* system, if not set the global id to thread-imc.
@@ -1010,10 +1007,10 @@ static int thread_imc_event_init(struct perf_event *event)
imc_global_refc.id = IMC_DOMAIN_THREAD;
imc_global_refc.refc++;
} else {
- mutex_unlock(&imc_global_refc.lock);
+ spin_unlock(&imc_global_refc.lock);
return -EBUSY;
}
- mutex_unlock(&imc_global_refc.lock);
+ spin_unlock(&imc_global_refc.lock);
event->pmu->task_ctx_nr = perf_sw_context;
event->destroy = reset_global_refc;
@@ -1135,25 +1132,25 @@ static int thread_imc_event_add(struct perf_event *event, int flags)
/*
* imc pmus are enabled only when it is used.
* See if this is triggered for the first time.
- * If yes, take the mutex lock and enable the counters.
+ * If yes, take the lock and enable the counters.
* If not, just increment the count in ref count struct.
*/
ref = &core_imc_refc[core_id];
if (!ref)
return -EINVAL;
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(smp_processor_id()))) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("thread-imc: Unable to start the counter\
for core %d\n", core_id);
return -EINVAL;
}
}
++ref->refc;
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
return 0;
}
@@ -1170,12 +1167,12 @@ static void thread_imc_event_del(struct perf_event *event, int flags)
return;
}
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
ref->refc--;
if (ref->refc == 0) {
if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(smp_processor_id()))) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("thread-imc: Unable to stop the counters\
for core %d\n", core_id);
return;
@@ -1183,7 +1180,7 @@ static void thread_imc_event_del(struct perf_event *event, int flags)
} else if (ref->refc < 0) {
ref->refc = 0;
}
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
/* Set bit 0 of LDBAR to zero, to stop posting updates to memory */
mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
@@ -1224,9 +1221,8 @@ static int trace_imc_mem_alloc(int cpu_id, int size)
}
}
- /* Init the mutex, if not already */
trace_imc_refc[core_id].id = core_id;
- mutex_init(&trace_imc_refc[core_id].lock);
+ spin_lock_init(&trace_imc_refc[core_id].lock);
mtspr(SPRN_LDBAR, 0);
return 0;
@@ -1246,10 +1242,10 @@ static int ppc_trace_imc_cpu_offline(unsigned int cpu)
* Reduce the refc if any trace-imc event running
* on this cpu.
*/
- mutex_lock(&imc_global_refc.lock);
+ spin_lock(&imc_global_refc.lock);
if (imc_global_refc.id == IMC_DOMAIN_TRACE)
imc_global_refc.refc--;
- mutex_unlock(&imc_global_refc.lock);
+ spin_unlock(&imc_global_refc.lock);
return 0;
}
@@ -1371,17 +1367,17 @@ static int trace_imc_event_add(struct perf_event *event, int flags)
}
mtspr(SPRN_LDBAR, ldbar_value);
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE,
get_hard_smp_processor_id(smp_processor_id()))) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("trace-imc: Unable to start the counters for core %d\n", core_id);
return -EINVAL;
}
}
++ref->refc;
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
return 0;
}
@@ -1414,19 +1410,19 @@ static void trace_imc_event_del(struct perf_event *event, int flags)
return;
}
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
ref->refc--;
if (ref->refc == 0) {
if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE,
get_hard_smp_processor_id(smp_processor_id()))) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id);
return;
}
} else if (ref->refc < 0) {
ref->refc = 0;
}
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
trace_imc_event_stop(event, flags);
}
@@ -1448,7 +1444,7 @@ static int trace_imc_event_init(struct perf_event *event)
* no other thread is running any core/thread imc
* events
*/
- mutex_lock(&imc_global_refc.lock);
+ spin_lock(&imc_global_refc.lock);
if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_TRACE) {
/*
* No core/thread imc events are running in the
@@ -1457,10 +1453,10 @@ static int trace_imc_event_init(struct perf_event *event)
imc_global_refc.id = IMC_DOMAIN_TRACE;
imc_global_refc.refc++;
} else {
- mutex_unlock(&imc_global_refc.lock);
+ spin_unlock(&imc_global_refc.lock);
return -EBUSY;
}
- mutex_unlock(&imc_global_refc.lock);
+ spin_unlock(&imc_global_refc.lock);
event->hw.idx = -1;
@@ -1533,10 +1529,10 @@ static int init_nest_pmu_ref(void)
i = 0;
for_each_node(nid) {
/*
- * Mutex lock to avoid races while tracking the number of
+ * Take the lock to avoid races while tracking the number of
* sessions using the chip's nest pmu units.
*/
- mutex_init(&nest_imc_refc[i].lock);
+ spin_lock_init(&nest_imc_refc[i].lock);
/*
* Loop to init the "id" with the node_id. Variable "i" initialized to
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index dbcfe361831a..ea807aa0c31a 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -92,7 +92,7 @@ out:
}
static int
-spufs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+spufs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
@@ -100,7 +100,7 @@ spufs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if ((attr->ia_valid & ATTR_SIZE) &&
(attr->ia_size != inode->i_size))
return -EINVAL;
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(&nop_mnt_idmap, inode, attr);
mark_inode_dirty(inode);
return 0;
}
@@ -237,7 +237,7 @@ spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
if (!inode)
return -ENOSPC;
- inode_init_owner(&init_user_ns, inode, dir, mode | S_IFDIR);
+ inode_init_owner(&nop_mnt_idmap, inode, dir, mode | S_IFDIR);
ctx = alloc_spu_context(SPUFS_I(dir)->i_gang); /* XXX gang */
SPUFS_I(inode)->i_ctx = ctx;
if (!ctx) {
@@ -468,7 +468,7 @@ spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode)
goto out;
ret = 0;
- inode_init_owner(&init_user_ns, inode, dir, mode | S_IFDIR);
+ inode_init_owner(&nop_mnt_idmap, inode, dir, mode | S_IFDIR);
gang = alloc_spu_gang();
SPUFS_I(inode)->i_ctx = NULL;
SPUFS_I(inode)->i_gang = gang;
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index faf2c2177094..82153960ac00 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -80,6 +80,9 @@ ifeq ($(CONFIG_PERF_EVENTS),y)
KBUILD_CFLAGS += -fno-omit-frame-pointer
endif
+# Avoid generating .eh_frame sections.
+KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -fno-unwind-tables
+
KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax)
KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax)
diff --git a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
index 43bed6c0a84f..5235fd1c9cb6 100644
--- a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
+++ b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
@@ -328,7 +328,7 @@
bus-range = <0x0 0xff>;
ranges = <0x81000000 0x0 0x60080000 0x0 0x60080000 0x0 0x10000>, /* I/O */
<0x82000000 0x0 0x60090000 0x0 0x60090000 0x0 0xff70000>, /* mem */
- <0x82000000 0x0 0x70000000 0x0 0x70000000 0x0 0x1000000>, /* mem */
+ <0x82000000 0x0 0x70000000 0x0 0x70000000 0x0 0x10000000>, /* mem */
<0xc3000000 0x20 0x00000000 0x20 0x00000000 0x20 0x00000000>; /* mem prefetchable */
num-lanes = <0x8>;
interrupts = <56>, <57>, <58>, <59>, <60>, <61>, <62>, <63>, <64>;
diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h
index 7226e2462584..2c0f4c887289 100644
--- a/arch/riscv/include/asm/alternative-macros.h
+++ b/arch/riscv/include/asm/alternative-macros.h
@@ -46,7 +46,7 @@
.macro ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \
new_c_2, vendor_id_2, errata_id_2, enable_2
- ALTERNATIVE_CFG \old_c, \new_c_1, \vendor_id_1, \errata_id_1, \enable_1
+ ALTERNATIVE_CFG "\old_c", "\new_c_1", \vendor_id_1, \errata_id_1, \enable_1
ALT_NEW_CONTENT \vendor_id_2, \errata_id_2, \enable_2, \new_c_2
.endm
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 86328e3acb02..64ad1937e714 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -70,7 +70,6 @@ static_assert(RISCV_ISA_EXT_ID_MAX <= RISCV_ISA_EXT_MAX);
*/
enum riscv_isa_ext_key {
RISCV_ISA_EXT_KEY_FPU, /* For 'F' and 'D' */
- RISCV_ISA_EXT_KEY_ZIHINTPAUSE,
RISCV_ISA_EXT_KEY_SVINVAL,
RISCV_ISA_EXT_KEY_MAX,
};
@@ -91,8 +90,6 @@ static __always_inline int riscv_isa_ext2key(int num)
return RISCV_ISA_EXT_KEY_FPU;
case RISCV_ISA_EXT_d:
return RISCV_ISA_EXT_KEY_FPU;
- case RISCV_ISA_EXT_ZIHINTPAUSE:
- return RISCV_ISA_EXT_KEY_ZIHINTPAUSE;
case RISCV_ISA_EXT_SVINVAL:
return RISCV_ISA_EXT_KEY_SVINVAL;
default:
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 4eba9a98d0e3..3e01f4f3ab08 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -721,6 +721,10 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
return __pmd(atomic_long_xchg((atomic_long_t *)pmdp, pmd_val(pmd)));
}
+
+#define pmdp_collapse_flush pmdp_collapse_flush
+extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index 855450bed9f5..ec0cab9fbddd 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -165,7 +165,7 @@ do { \
might_fault(); \
access_ok(__p, sizeof(*__p)) ? \
__get_user((x), __p) : \
- ((x) = 0, -EFAULT); \
+ ((x) = (__force __typeof__(x))0, -EFAULT); \
})
#define __put_user_asm(insn, x, ptr, err) \
diff --git a/arch/riscv/include/asm/vdso/processor.h b/arch/riscv/include/asm/vdso/processor.h
index fa70cfe507aa..14f5d27783b8 100644
--- a/arch/riscv/include/asm/vdso/processor.h
+++ b/arch/riscv/include/asm/vdso/processor.h
@@ -4,30 +4,26 @@
#ifndef __ASSEMBLY__
-#include <linux/jump_label.h>
#include <asm/barrier.h>
-#include <asm/hwcap.h>
static inline void cpu_relax(void)
{
- if (!static_branch_likely(&riscv_isa_ext_keys[RISCV_ISA_EXT_KEY_ZIHINTPAUSE])) {
#ifdef __riscv_muldiv
- int dummy;
- /* In lieu of a halt instruction, induce a long-latency stall. */
- __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy));
+ int dummy;
+ /* In lieu of a halt instruction, induce a long-latency stall. */
+ __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy));
#endif
- } else {
- /*
- * Reduce instruction retirement.
- * This assumes the PC changes.
- */
-#ifdef CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE
- __asm__ __volatile__ ("pause");
+
+#ifdef __riscv_zihintpause
+ /*
+ * Reduce instruction retirement.
+ * This assumes the PC changes.
+ */
+ __asm__ __volatile__ ("pause");
#else
- /* Encoding of the pause instruction */
- __asm__ __volatile__ (".4byte 0x100000F");
+ /* Encoding of the pause instruction */
+ __asm__ __volatile__ (".4byte 0x100000F");
#endif
- }
barrier();
}
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index b865046e4dbb..4bf6c449d78b 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -326,7 +326,7 @@ clear_bss_done:
call soc_early_init
tail start_kernel
-#if CONFIG_RISCV_BOOT_SPINWAIT
+#ifdef CONFIG_RISCV_BOOT_SPINWAIT
.Lsecondary_start:
/* Set trap vector to spin forever to help debug */
la a3, .Lsecondary_park
diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c
index f21592d20306..2bedec37d092 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -48,15 +48,35 @@ static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
post_kprobe_handler(p, kcb, regs);
}
+static bool __kprobes arch_check_kprobe(struct kprobe *p)
+{
+ unsigned long tmp = (unsigned long)p->addr - p->offset;
+ unsigned long addr = (unsigned long)p->addr;
+
+ while (tmp <= addr) {
+ if (tmp == addr)
+ return true;
+
+ tmp += GET_INSN_LENGTH(*(u16 *)tmp);
+ }
+
+ return false;
+}
+
int __kprobes arch_prepare_kprobe(struct kprobe *p)
{
- unsigned long probe_addr = (unsigned long)p->addr;
+ u16 *insn = (u16 *)p->addr;
+
+ if ((unsigned long)insn & 0x1)
+ return -EILSEQ;
- if (probe_addr & 0x1)
+ if (!arch_check_kprobe(p))
return -EILSEQ;
/* copy instruction */
- p->opcode = *p->addr;
+ p->opcode = (kprobe_opcode_t)(*insn++);
+ if (GET_INSN_LENGTH(p->opcode) == 4)
+ p->opcode |= (kprobe_opcode_t)(*insn) << 16;
/* decode instruction */
switch (riscv_probe_decode_insn(p->addr, &p->ainsn.api)) {
diff --git a/arch/riscv/kernel/probes/simulate-insn.c b/arch/riscv/kernel/probes/simulate-insn.c
index d73e96f6ed7c..a20568bd1f1a 100644
--- a/arch/riscv/kernel/probes/simulate-insn.c
+++ b/arch/riscv/kernel/probes/simulate-insn.c
@@ -71,11 +71,11 @@ bool __kprobes simulate_jalr(u32 opcode, unsigned long addr, struct pt_regs *reg
u32 rd_index = (opcode >> 7) & 0x1f;
u32 rs1_index = (opcode >> 15) & 0x1f;
- ret = rv_insn_reg_set_val(regs, rd_index, addr + 4);
+ ret = rv_insn_reg_get_val(regs, rs1_index, &base_addr);
if (!ret)
return ret;
- ret = rv_insn_reg_get_val(regs, rs1_index, &base_addr);
+ ret = rv_insn_reg_set_val(regs, rd_index, addr + 4);
if (!ret)
return ret;
diff --git a/arch/riscv/kernel/probes/simulate-insn.h b/arch/riscv/kernel/probes/simulate-insn.h
index cb6ff7dccb92..de8474146a9b 100644
--- a/arch/riscv/kernel/probes/simulate-insn.h
+++ b/arch/riscv/kernel/probes/simulate-insn.h
@@ -31,9 +31,9 @@ __RISCV_INSN_FUNCS(fence, 0x7f, 0x0f);
} while (0)
__RISCV_INSN_FUNCS(c_j, 0xe003, 0xa001);
-__RISCV_INSN_FUNCS(c_jr, 0xf007, 0x8002);
+__RISCV_INSN_FUNCS(c_jr, 0xf07f, 0x8002);
__RISCV_INSN_FUNCS(c_jal, 0xe003, 0x2001);
-__RISCV_INSN_FUNCS(c_jalr, 0xf007, 0x9002);
+__RISCV_INSN_FUNCS(c_jalr, 0xf07f, 0x9002);
__RISCV_INSN_FUNCS(c_beqz, 0xe003, 0xc001);
__RISCV_INSN_FUNCS(c_bnez, 0xe003, 0xe001);
__RISCV_INSN_FUNCS(c_ebreak, 0xffff, 0x9002);
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index 3373df413c88..ddb2afba6d25 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -39,7 +39,6 @@ static DECLARE_COMPLETION(cpu_running);
void __init smp_prepare_boot_cpu(void)
{
- init_cpu_topology();
}
void __init smp_prepare_cpus(unsigned int max_cpus)
@@ -48,6 +47,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
int ret;
unsigned int curr_cpuid;
+ init_cpu_topology();
+
curr_cpuid = smp_processor_id();
store_cpu_topology(curr_cpuid);
numa_store_cpu_info(curr_cpuid);
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index 75c8dd64fc48..f9a5a7c90ff0 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -32,6 +32,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
fp = (unsigned long)__builtin_frame_address(0);
sp = current_stack_pointer;
pc = (unsigned long)walk_stackframe;
+ level = -1;
} else {
/* task blocked in __switch_to */
fp = task->thread.s[0];
@@ -43,7 +44,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
unsigned long low, high;
struct stackframe *frame;
- if (unlikely(!__kernel_text_address(pc) || (level++ >= 1 && !fn(arg, pc))))
+ if (unlikely(!__kernel_text_address(pc) || (level++ >= 0 && !fn(arg, pc))))
break;
/* Validate frame pointer */
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index 3cc07ed45aeb..fcd6145fbead 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -90,8 +90,10 @@ void flush_icache_pte(pte_t pte)
if (PageHuge(page))
page = compound_head(page);
- if (!test_and_set_bit(PG_dcache_clean, &page->flags))
+ if (!test_bit(PG_dcache_clean, &page->flags)) {
flush_icache_all();
+ set_bit(PG_dcache_clean, &page->flags);
+ }
}
#endif /* CONFIG_MMU */
diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c
index 6645ead1a7c1..fef4e7328e49 100644
--- a/arch/riscv/mm/pgtable.c
+++ b/arch/riscv/mm/pgtable.c
@@ -81,3 +81,23 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
}
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
+
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ VM_BUG_ON(pmd_trans_huge(*pmdp));
+ /*
+ * When leaf PTE entries (regular pages) are collapsed into a leaf
+ * PMD entry (huge page), a valid non-leaf PTE is converted into a
+ * valid leaf PTE at the level 1 page table. Since the sfence.vma
+ * forms that specify an address only apply to leaf PTEs, we need a
+ * global flush here. collapse_huge_page() assumes these flushes are
+ * eager, so just do the fence here.
+ */
+ flush_tlb_mm(vma->vm_mm);
+ return pmd;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c
index e27c2140d620..b519a1f045d8 100644
--- a/arch/s390/boot/decompressor.c
+++ b/arch/s390/boot/decompressor.c
@@ -23,9 +23,9 @@
#define memmove memmove
#define memzero(s, n) memset((s), 0, (n))
-#ifdef CONFIG_KERNEL_BZIP2
+#if defined(CONFIG_KERNEL_BZIP2)
#define BOOT_HEAP_SIZE 0x400000
-#elif CONFIG_KERNEL_ZSTD
+#elif defined(CONFIG_KERNEL_ZSTD)
#define BOOT_HEAP_SIZE 0x30000
#else
#define BOOT_HEAP_SIZE 0x10000
@@ -80,6 +80,6 @@ void *decompress_kernel(void)
void *output = (void *)decompress_offset;
__decompress(_compressed_start, _compressed_end - _compressed_start,
- NULL, NULL, output, 0, NULL, error);
+ NULL, NULL, output, vmlinux.image_size, NULL, error);
return output;
}
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index a7b4e1d82758..74b35ec2ad28 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -190,7 +190,6 @@ CONFIG_NFT_CT=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
CONFIG_NFT_NAT=m
-CONFIG_NFT_OBJREF=m
CONFIG_NFT_REJECT=m
CONFIG_NFT_COMPAT=m
CONFIG_NFT_HASH=m
@@ -569,6 +568,7 @@ CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
CONFIG_LEGACY_PTY_COUNT=0
+# CONFIG_LEGACY_TIOCSTI is not set
CONFIG_VIRTIO_CONSOLE=m
CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_HANGCHECK_TIMER=m
@@ -660,6 +660,7 @@ CONFIG_CONFIGFS_FS=m
CONFIG_ECRYPT_FS=m
CONFIG_CRAMFS=m
CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y
CONFIG_SQUASHFS_XATTR=y
CONFIG_SQUASHFS_LZ4=y
CONFIG_SQUASHFS_LZO=y
@@ -705,6 +706,7 @@ CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
CONFIG_SECURITY_LANDLOCK=y
CONFIG_INTEGRITY_SIGNATURE=y
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
+CONFIG_INTEGRITY_PLATFORM_KEYRING=y
CONFIG_IMA=y
CONFIG_IMA_DEFAULT_HASH_SHA256=y
CONFIG_IMA_WRITE_POLICY=y
@@ -781,6 +783,7 @@ CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
CONFIG_CRYPTO_DEV_VIRTIO=m
+CONFIG_SYSTEM_BLACKLIST_KEYRING=y
CONFIG_CORDIC=m
CONFIG_CRYPTO_LIB_CURVE25519=m
CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
@@ -848,7 +851,6 @@ CONFIG_PREEMPT_TRACER=y
CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_BPF_KPROBE_OVERRIDE=y
CONFIG_HIST_TRIGGERS=y
CONFIG_FTRACE_STARTUP_TEST=y
# CONFIG_EVENT_TRACE_STARTUP_TEST is not set
@@ -870,7 +872,6 @@ CONFIG_FAIL_MAKE_REQUEST=y
CONFIG_FAIL_IO_TIMEOUT=y
CONFIG_FAIL_FUTEX=y
CONFIG_FAULT_INJECTION_DEBUG_FS=y
-CONFIG_FAIL_FUNCTION=y
CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
CONFIG_LKDTM=m
CONFIG_TEST_MIN_HEAP=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 2bc2d0fe5774..cec71268e3bc 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -181,7 +181,6 @@ CONFIG_NFT_CT=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
CONFIG_NFT_NAT=m
-CONFIG_NFT_OBJREF=m
CONFIG_NFT_REJECT=m
CONFIG_NFT_COMPAT=m
CONFIG_NFT_HASH=m
@@ -559,6 +558,7 @@ CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
CONFIG_LEGACY_PTY_COUNT=0
+# CONFIG_LEGACY_TIOCSTI is not set
CONFIG_VIRTIO_CONSOLE=m
CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_HANGCHECK_TIMER=m
@@ -645,6 +645,7 @@ CONFIG_CONFIGFS_FS=m
CONFIG_ECRYPT_FS=m
CONFIG_CRAMFS=m
CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y
CONFIG_SQUASHFS_XATTR=y
CONFIG_SQUASHFS_LZ4=y
CONFIG_SQUASHFS_LZO=y
@@ -688,6 +689,7 @@ CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
CONFIG_SECURITY_LANDLOCK=y
CONFIG_INTEGRITY_SIGNATURE=y
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
+CONFIG_INTEGRITY_PLATFORM_KEYRING=y
CONFIG_IMA=y
CONFIG_IMA_DEFAULT_HASH_SHA256=y
CONFIG_IMA_WRITE_POLICY=y
@@ -766,6 +768,7 @@ CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
CONFIG_CRYPTO_DEV_VIRTIO=m
+CONFIG_SYSTEM_BLACKLIST_KEYRING=y
CONFIG_CORDIC=m
CONFIG_PRIME_NUMBERS=m
CONFIG_CRYPTO_LIB_CURVE25519=m
@@ -798,7 +801,6 @@ CONFIG_STACK_TRACER=y
CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_BPF_KPROBE_OVERRIDE=y
CONFIG_HIST_TRIGGERS=y
CONFIG_SAMPLES=y
CONFIG_SAMPLE_TRACE_PRINTK=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index ae14ab0b864d..a9c0c81d1de9 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -13,7 +13,6 @@ CONFIG_TUNE_ZEC12=y
# CONFIG_COMPAT is not set
CONFIG_NR_CPUS=2
CONFIG_HZ_100=y
-# CONFIG_RELOCATABLE is not set
# CONFIG_CHSC_SCH is not set
# CONFIG_SCM_BUS is not set
CONFIG_CRASH_DUMP=y
@@ -50,6 +49,7 @@ CONFIG_ZFCP=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
+# CONFIG_LEGACY_TIOCSTI is not set
# CONFIG_HVC_IUCV is not set
# CONFIG_HW_RANDOM_S390 is not set
# CONFIG_HMC_DRV is not set
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index feaba12dbecb..efa103b52a1a 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -131,19 +131,21 @@ struct hws_combined_entry {
struct hws_diag_entry diag; /* Diagnostic-sampling data entry */
} __packed;
-struct hws_trailer_entry {
- union {
- struct {
- unsigned int f:1; /* 0 - Block Full Indicator */
- unsigned int a:1; /* 1 - Alert request control */
- unsigned int t:1; /* 2 - Timestamp format */
- unsigned int :29; /* 3 - 31: Reserved */
- unsigned int bsdes:16; /* 32-47: size of basic SDE */
- unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */
- };
- unsigned long long flags; /* 0 - 63: All indicators */
+union hws_trailer_header {
+ struct {
+ unsigned int f:1; /* 0 - Block Full Indicator */
+ unsigned int a:1; /* 1 - Alert request control */
+ unsigned int t:1; /* 2 - Timestamp format */
+ unsigned int :29; /* 3 - 31: Reserved */
+ unsigned int bsdes:16; /* 32-47: size of basic SDE */
+ unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */
+ unsigned long long overflow; /* 64 - Overflow Count */
};
- unsigned long long overflow; /* 64 - sample Overflow count */
+ __uint128_t val;
+};
+
+struct hws_trailer_entry {
+ union hws_trailer_header header; /* 0 - 15 Flags + Overflow Count */
unsigned char timestamp[16]; /* 16 - 31 timestamp */
unsigned long long reserved1; /* 32 -Reserved */
unsigned long long reserved2; /* */
@@ -290,14 +292,11 @@ static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
return USEC_PER_SEC * qsi->cpu_speed / rate;
}
-#define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL
-#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL
-
/* Return TOD timestamp contained in an trailer entry */
static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
{
/* TOD in STCKE format */
- if (te->t)
+ if (te->header.t)
return *((unsigned long long *) &te->timestamp[1]);
/* TOD in STCK format */
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index 77f24262c25c..ac665b9670c5 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -4,8 +4,8 @@
*
* Copyright IBM Corp. 1999, 2020
*/
-#ifndef DEBUG_H
-#define DEBUG_H
+#ifndef _ASM_S390_DEBUG_H
+#define _ASM_S390_DEBUG_H
#include <linux/string.h>
#include <linux/spinlock.h>
@@ -487,4 +487,4 @@ void debug_register_static(debug_info_t *id, int pages_per_area, int nr_areas);
#endif /* MODULE */
-#endif /* DEBUG_H */
+#endif /* _ASM_S390_DEBUG_H */
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index cb5fc0690435..081837b391e3 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -31,7 +31,7 @@
pcp_op_T__ *ptr__; \
preempt_disable_notrace(); \
ptr__ = raw_cpu_ptr(&(pcp)); \
- prev__ = *ptr__; \
+ prev__ = READ_ONCE(*ptr__); \
do { \
old__ = prev__; \
new__ = old__ op (val); \
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index fc6d5f58debe..2df94d32140c 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -187,8 +187,6 @@ static int kexec_file_add_ipl_report(struct kimage *image,
data->memsz = ALIGN(data->memsz, PAGE_SIZE);
buf.mem = data->memsz;
- if (image->type == KEXEC_TYPE_CRASH)
- buf.mem += crashk_res.start;
ptr = (void *)ipl_cert_list_addr;
end = ptr + ipl_cert_list_size;
@@ -225,6 +223,9 @@ static int kexec_file_add_ipl_report(struct kimage *image,
data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr);
*lc_ipl_parmblock_ptr = (__u32)buf.mem;
+ if (image->type == KEXEC_TYPE_CRASH)
+ buf.mem += crashk_res.start;
+
ret = kexec_add_buffer(&buf);
out:
return ret;
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 332a49965130..ce886a03545a 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -163,14 +163,15 @@ static void free_sampling_buffer(struct sf_buffer *sfb)
static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
{
- unsigned long sdb, *trailer;
+ struct hws_trailer_entry *te;
+ unsigned long sdb;
/* Allocate and initialize sample-data-block */
sdb = get_zeroed_page(gfp_flags);
if (!sdb)
return -ENOMEM;
- trailer = trailer_entry_ptr(sdb);
- *trailer = SDB_TE_ALERT_REQ_MASK;
+ te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
+ te->header.a = 1;
/* Link SDB into the sample-data-block-table */
*sdbt = sdb;
@@ -1206,7 +1207,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
"%s: Found unknown"
" sampling data entry: te->f %i"
" basic.def %#4x (%p)\n", __func__,
- te->f, sample->def, sample);
+ te->header.f, sample->def, sample);
/* Sample slot is not yet written or other record.
*
* This condition can occur if the buffer was reused
@@ -1217,7 +1218,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
* that are not full. Stop processing if the first
* invalid format was detected.
*/
- if (!te->f)
+ if (!te->header.f)
break;
}
@@ -1227,6 +1228,16 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
}
}
+static inline __uint128_t __cdsg(__uint128_t *ptr, __uint128_t old, __uint128_t new)
+{
+ asm volatile(
+ " cdsg %[old],%[new],%[ptr]\n"
+ : [old] "+d" (old), [ptr] "+QS" (*ptr)
+ : [new] "d" (new)
+ : "memory", "cc");
+ return old;
+}
+
/* hw_perf_event_update() - Process sampling buffer
* @event: The perf event
* @flush_all: Flag to also flush partially filled sample-data-blocks
@@ -1243,10 +1254,11 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
*/
static void hw_perf_event_update(struct perf_event *event, int flush_all)
{
+ unsigned long long event_overflow, sampl_overflow, num_sdb;
+ union hws_trailer_header old, prev, new;
struct hw_perf_event *hwc = &event->hw;
struct hws_trailer_entry *te;
unsigned long *sdbt;
- unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
int done;
/*
@@ -1266,25 +1278,25 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
/* Leave loop if no more work to do (block full indicator) */
- if (!te->f) {
+ if (!te->header.f) {
done = 1;
if (!flush_all)
break;
}
/* Check the sample overflow count */
- if (te->overflow)
+ if (te->header.overflow)
/* Account sample overflows and, if a particular limit
* is reached, extend the sampling buffer.
* For details, see sfb_account_overflows().
*/
- sampl_overflow += te->overflow;
+ sampl_overflow += te->header.overflow;
/* Timestamps are valid for full sample-data-blocks only */
debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx "
"overflow %llu timestamp %#llx\n",
- __func__, (unsigned long)sdbt, te->overflow,
- (te->f) ? trailer_timestamp(te) : 0ULL);
+ __func__, (unsigned long)sdbt, te->header.overflow,
+ (te->header.f) ? trailer_timestamp(te) : 0ULL);
/* Collect all samples from a single sample-data-block and
* flag if an (perf) event overflow happened. If so, the PMU
@@ -1294,12 +1306,16 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
num_sdb++;
/* Reset trailer (using compare-double-and-swap) */
+ /* READ_ONCE() 16 byte header */
+ prev.val = __cdsg(&te->header.val, 0, 0);
do {
- te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
- te_flags |= SDB_TE_ALERT_REQ_MASK;
- } while (!cmpxchg_double(&te->flags, &te->overflow,
- te->flags, te->overflow,
- te_flags, 0ULL));
+ old.val = prev.val;
+ new.val = prev.val;
+ new.f = 0;
+ new.a = 1;
+ new.overflow = 0;
+ prev.val = __cdsg(&te->header.val, old.val, new.val);
+ } while (prev.val != old.val);
/* Advance to next sample-data-block */
sdbt++;
@@ -1384,7 +1400,7 @@ static void aux_output_end(struct perf_output_handle *handle)
range_scan = AUX_SDB_NUM_ALERT(aux);
for (i = 0, idx = aux->head; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
- if (!(te->flags & SDB_TE_BUFFER_FULL_MASK))
+ if (!te->header.f)
break;
}
/* i is num of SDBs which are full */
@@ -1392,7 +1408,7 @@ static void aux_output_end(struct perf_output_handle *handle)
/* Remove alert indicators in the buffer */
te = aux_sdb_trailer(aux, aux->alert_mark);
- te->flags &= ~SDB_TE_ALERT_REQ_MASK;
+ te->header.a = 0;
debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n",
__func__, i, range_scan, aux->head);
@@ -1437,9 +1453,9 @@ static int aux_output_begin(struct perf_output_handle *handle,
idx = aux->empty_mark + 1;
for (i = 0; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
- te->flags &= ~(SDB_TE_BUFFER_FULL_MASK |
- SDB_TE_ALERT_REQ_MASK);
- te->overflow = 0;
+ te->header.f = 0;
+ te->header.a = 0;
+ te->header.overflow = 0;
}
/* Save the position of empty SDBs */
aux->empty_mark = aux->head + range - 1;
@@ -1448,7 +1464,7 @@ static int aux_output_begin(struct perf_output_handle *handle,
/* Set alert indicator */
aux->alert_mark = aux->head + range/2 - 1;
te = aux_sdb_trailer(aux, aux->alert_mark);
- te->flags = te->flags | SDB_TE_ALERT_REQ_MASK;
+ te->header.a = 1;
/* Reset hardware buffer head */
head = AUX_SDB_INDEX(aux, aux->head);
@@ -1475,14 +1491,17 @@ static int aux_output_begin(struct perf_output_handle *handle,
static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
unsigned long long *overflow)
{
- unsigned long long orig_overflow, orig_flags, new_flags;
+ union hws_trailer_header old, prev, new;
struct hws_trailer_entry *te;
te = aux_sdb_trailer(aux, alert_index);
+ /* READ_ONCE() 16 byte header */
+ prev.val = __cdsg(&te->header.val, 0, 0);
do {
- orig_flags = te->flags;
- *overflow = orig_overflow = te->overflow;
- if (orig_flags & SDB_TE_BUFFER_FULL_MASK) {
+ old.val = prev.val;
+ new.val = prev.val;
+ *overflow = old.overflow;
+ if (old.f) {
/*
* SDB is already set by hardware.
* Abort and try to set somewhere
@@ -1490,10 +1509,10 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
*/
return false;
}
- new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK;
- } while (!cmpxchg_double(&te->flags, &te->overflow,
- orig_flags, orig_overflow,
- new_flags, 0ULL));
+ new.a = 1;
+ new.overflow = 0;
+ prev.val = __cdsg(&te->header.val, old.val, new.val);
+ } while (prev.val != old.val);
return true;
}
@@ -1522,8 +1541,9 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
unsigned long long *overflow)
{
- unsigned long long orig_overflow, orig_flags, new_flags;
unsigned long i, range_scan, idx, idx_old;
+ union hws_trailer_header old, prev, new;
+ unsigned long long orig_overflow;
struct hws_trailer_entry *te;
debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld "
@@ -1554,17 +1574,20 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
idx_old = idx = aux->empty_mark + 1;
for (i = 0; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
+ /* READ_ONCE() 16 byte header */
+ prev.val = __cdsg(&te->header.val, 0, 0);
do {
- orig_flags = te->flags;
- orig_overflow = te->overflow;
- new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK;
+ old.val = prev.val;
+ new.val = prev.val;
+ orig_overflow = old.overflow;
+ new.f = 0;
+ new.overflow = 0;
if (idx == aux->alert_mark)
- new_flags |= SDB_TE_ALERT_REQ_MASK;
+ new.a = 1;
else
- new_flags &= ~SDB_TE_ALERT_REQ_MASK;
- } while (!cmpxchg_double(&te->flags, &te->overflow,
- orig_flags, orig_overflow,
- new_flags, 0ULL));
+ new.a = 0;
+ prev.val = __cdsg(&te->header.val, old.val, new.val);
+ } while (prev.val != old.val);
*overflow += orig_overflow;
}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 2b6091349daa..696c9e007a36 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -508,6 +508,7 @@ static void __init setup_lowcore_dat_on(void)
{
struct lowcore *abs_lc;
unsigned long flags;
+ int i;
__ctl_clear_bit(0, 28);
S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
@@ -523,8 +524,8 @@ static void __init setup_lowcore_dat_on(void)
abs_lc = get_abs_lowcore(&flags);
abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
abs_lc->program_new_psw = S390_lowcore.program_new_psw;
- memcpy(abs_lc->cregs_save_area, S390_lowcore.cregs_save_area,
- sizeof(abs_lc->cregs_save_area));
+ for (i = 0; i < 16; i++)
+ abs_lc->cregs_save_area[i] = S390_lowcore.cregs_save_area[i];
put_abs_lowcore(abs_lc, flags);
}
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 5ea3830af0cc..cbf9c1b0beda 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -17,6 +17,8 @@
/* Handle ro_after_init data on our own. */
#define RO_AFTER_INIT_DATA
+#define RUNTIME_DISCARD_EXIT
+
#define EMITS_PT_NOTE
#include <asm-generic/vmlinux.lds.h>
@@ -79,6 +81,7 @@ SECTIONS
_end_amode31_refs = .;
}
+ . = ALIGN(PAGE_SIZE);
_edata = .; /* End of data section */
/* will be freed after init */
@@ -193,6 +196,7 @@ SECTIONS
BSS_SECTION(PAGE_SIZE, 4 * PAGE_SIZE, PAGE_SIZE)
+ . = ALIGN(PAGE_SIZE);
_end = . ;
/*
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 1dae78deddf2..ab26aa53ee37 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -83,8 +83,9 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
struct esca_block *sca = vcpu->kvm->arch.sca;
union esca_sigp_ctrl *sigp_ctrl =
&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
- union esca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl;
+ union esca_sigp_ctrl new_val = {0}, old_val;
+ old_val = READ_ONCE(*sigp_ctrl);
new_val.scn = src_id;
new_val.c = 1;
old_val.c = 0;
@@ -95,8 +96,9 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
struct bsca_block *sca = vcpu->kvm->arch.sca;
union bsca_sigp_ctrl *sigp_ctrl =
&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
- union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl;
+ union bsca_sigp_ctrl new_val = {0}, old_val;
+ old_val = READ_ONCE(*sigp_ctrl);
new_val.scn = src_id;
new_val.c = 1;
old_val.c = 0;
@@ -126,16 +128,18 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
struct esca_block *sca = vcpu->kvm->arch.sca;
union esca_sigp_ctrl *sigp_ctrl =
&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
- union esca_sigp_ctrl old = *sigp_ctrl;
+ union esca_sigp_ctrl old;
+ old = READ_ONCE(*sigp_ctrl);
expect = old.value;
rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
} else {
struct bsca_block *sca = vcpu->kvm->arch.sca;
union bsca_sigp_ctrl *sigp_ctrl =
&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
- union bsca_sigp_ctrl old = *sigp_ctrl;
+ union bsca_sigp_ctrl old;
+ old = READ_ONCE(*sigp_ctrl);
expect = old.value;
rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
}
diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h
index a889a3a938ba..d1ce73f3bd85 100644
--- a/arch/sh/include/asm/pgtable-3level.h
+++ b/arch/sh/include/asm/pgtable-3level.h
@@ -28,7 +28,7 @@
#define pmd_ERROR(e) \
printk("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e))
-typedef struct {
+typedef union {
struct {
unsigned long pmd_low;
unsigned long pmd_high;
diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
index 3161b9ccd2a5..b6276a3521d7 100644
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -4,6 +4,7 @@
* Written by Niibe Yutaka and Paul Mundt
*/
OUTPUT_ARCH(sh)
+#define RUNTIME_DISCARD_EXIT
#include <asm/thread_info.h>
#include <asm/cache.h>
#include <asm/vmlinux.lds.h>
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 9cf07322875a..73ed982d4100 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -14,13 +14,13 @@ endif
ifdef CONFIG_CC_IS_GCC
RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
-RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch-cs-prefix)
RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register)
endif
ifdef CONFIG_CC_IS_CLANG
RETPOLINE_CFLAGS := -mretpoline-external-thunk
RETPOLINE_VDSO_CFLAGS := -mretpoline
endif
+RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch-cs-prefix)
ifdef CONFIG_RETHUNK
RETHUNK_CFLAGS := -mfunction-return=thunk-extern
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S
index 5521ea12f44e..aa9b96457584 100644
--- a/arch/x86/boot/bioscall.S
+++ b/arch/x86/boot/bioscall.S
@@ -32,7 +32,7 @@ intcall:
movw %dx, %si
movw %sp, %di
movw $11, %cx
- rep; movsd
+ rep; movsl
/* Pop full state from the stack */
popal
@@ -67,7 +67,7 @@ intcall:
jz 4f
movw %sp, %si
movw $11, %cx
- rep; movsd
+ rep; movsl
4: addw $44, %sp
/* Restore state and return */
diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c
index d4a314cc50d6..321a5011042d 100644
--- a/arch/x86/boot/compressed/ident_map_64.c
+++ b/arch/x86/boot/compressed/ident_map_64.c
@@ -180,6 +180,12 @@ void initialize_identity_maps(void *rmode)
/* Load the new page-table. */
write_cr3(top_level_pgt);
+
+ /*
+ * Now that the required page table mappings are established and a
+ * GHCB can be used, check for SNP guest/HV feature compatibility.
+ */
+ snp_check_features();
}
static pte_t *split_large_pmd(struct x86_mapping_info *info,
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 62208ec04ca4..20118fb7c53b 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -126,6 +126,7 @@ static inline void console_init(void)
#ifdef CONFIG_AMD_MEM_ENCRYPT
void sev_enable(struct boot_params *bp);
+void snp_check_features(void);
void sev_es_shutdown_ghcb(void);
extern bool sev_es_check_ghcb_fault(unsigned long address);
void snp_set_page_private(unsigned long paddr);
@@ -143,6 +144,7 @@ static inline void sev_enable(struct boot_params *bp)
if (bp)
bp->cc_blob_address = 0;
}
+static inline void snp_check_features(void) { }
static inline void sev_es_shutdown_ghcb(void) { }
static inline bool sev_es_check_ghcb_fault(unsigned long address)
{
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index c93930d5ccbd..d63ad8f99f83 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -208,6 +208,23 @@ void sev_es_shutdown_ghcb(void)
error("Can't unmap GHCB page");
}
+static void __noreturn sev_es_ghcb_terminate(struct ghcb *ghcb, unsigned int set,
+ unsigned int reason, u64 exit_info_2)
+{
+ u64 exit_info_1 = SVM_VMGEXIT_TERM_REASON(set, reason);
+
+ vc_ghcb_invalidate(ghcb);
+ ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_TERM_REQUEST);
+ ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
+ ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
+
+ sev_es_wr_ghcb_msr(__pa(ghcb));
+ VMGEXIT();
+
+ while (true)
+ asm volatile("hlt\n" : : : "memory");
+}
+
bool sev_es_check_ghcb_fault(unsigned long address)
{
/* Check whether the fault was on the GHCB page */
@@ -270,6 +287,59 @@ static void enforce_vmpl0(void)
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0);
}
+/*
+ * SNP_FEATURES_IMPL_REQ is the mask of SNP features that will need
+ * guest side implementation for proper functioning of the guest. If any
+ * of these features are enabled in the hypervisor but are lacking guest
+ * side implementation, the behavior of the guest will be undefined. The
+ * guest could fail in non-obvious way making it difficult to debug.
+ *
+ * As the behavior of reserved feature bits is unknown to be on the
+ * safe side add them to the required features mask.
+ */
+#define SNP_FEATURES_IMPL_REQ (MSR_AMD64_SNP_VTOM | \
+ MSR_AMD64_SNP_REFLECT_VC | \
+ MSR_AMD64_SNP_RESTRICTED_INJ | \
+ MSR_AMD64_SNP_ALT_INJ | \
+ MSR_AMD64_SNP_DEBUG_SWAP | \
+ MSR_AMD64_SNP_VMPL_SSS | \
+ MSR_AMD64_SNP_SECURE_TSC | \
+ MSR_AMD64_SNP_VMGEXIT_PARAM | \
+ MSR_AMD64_SNP_VMSA_REG_PROTECTION | \
+ MSR_AMD64_SNP_RESERVED_BIT13 | \
+ MSR_AMD64_SNP_RESERVED_BIT15 | \
+ MSR_AMD64_SNP_RESERVED_MASK)
+
+/*
+ * SNP_FEATURES_PRESENT is the mask of SNP features that are implemented
+ * by the guest kernel. As and when a new feature is implemented in the
+ * guest kernel, a corresponding bit should be added to the mask.
+ */
+#define SNP_FEATURES_PRESENT (0)
+
+void snp_check_features(void)
+{
+ u64 unsupported;
+
+ if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+ return;
+
+ /*
+ * Terminate the boot if hypervisor has enabled any feature lacking
+ * guest side implementation. Pass on the unsupported features mask through
+ * EXIT_INFO_2 of the GHCB protocol so that those features can be reported
+ * as part of the guest boot failure.
+ */
+ unsupported = sev_status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT;
+ if (unsupported) {
+ if (ghcb_version < 2 || (!boot_ghcb && !early_setup_ghcb()))
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+
+ sev_es_ghcb_terminate(boot_ghcb, SEV_TERM_SET_GEN,
+ GHCB_SNP_UNSUPPORTED, unsupported);
+ }
+}
+
void sev_enable(struct boot_params *bp)
{
unsigned int eax, ebx, ecx, edx;
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index cfd4c95b9f04..669d9e4f2901 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -386,8 +386,8 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
{
unsigned long *reg, val, vaddr;
char buffer[MAX_INSN_SIZE];
+ enum insn_mmio_type mmio;
struct insn insn = {};
- enum mmio_type mmio;
int size, extend_size;
u8 extend_val = 0;
@@ -402,10 +402,10 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
return -EINVAL;
mmio = insn_decode_mmio(&insn, &size);
- if (WARN_ON_ONCE(mmio == MMIO_DECODE_FAILED))
+ if (WARN_ON_ONCE(mmio == INSN_MMIO_DECODE_FAILED))
return -EINVAL;
- if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) {
+ if (mmio != INSN_MMIO_WRITE_IMM && mmio != INSN_MMIO_MOVS) {
reg = insn_get_modrm_reg_ptr(&insn, regs);
if (!reg)
return -EINVAL;
@@ -426,23 +426,23 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
/* Handle writes first */
switch (mmio) {
- case MMIO_WRITE:
+ case INSN_MMIO_WRITE:
memcpy(&val, reg, size);
if (!mmio_write(size, ve->gpa, val))
return -EIO;
return insn.length;
- case MMIO_WRITE_IMM:
+ case INSN_MMIO_WRITE_IMM:
val = insn.immediate.value;
if (!mmio_write(size, ve->gpa, val))
return -EIO;
return insn.length;
- case MMIO_READ:
- case MMIO_READ_ZERO_EXTEND:
- case MMIO_READ_SIGN_EXTEND:
+ case INSN_MMIO_READ:
+ case INSN_MMIO_READ_ZERO_EXTEND:
+ case INSN_MMIO_READ_SIGN_EXTEND:
/* Reads are handled below */
break;
- case MMIO_MOVS:
- case MMIO_DECODE_FAILED:
+ case INSN_MMIO_MOVS:
+ case INSN_MMIO_DECODE_FAILED:
/*
* MMIO was accessed with an instruction that could not be
* decoded or handled properly. It was likely not using io.h
@@ -459,15 +459,15 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
return -EIO;
switch (mmio) {
- case MMIO_READ:
+ case INSN_MMIO_READ:
/* Zero-extend for 32-bit operation */
extend_size = size == 4 ? sizeof(*reg) : 0;
break;
- case MMIO_READ_ZERO_EXTEND:
+ case INSN_MMIO_READ_ZERO_EXTEND:
/* Zero extend based on operand size */
extend_size = insn.opnd_bytes;
break;
- case MMIO_READ_SIGN_EXTEND:
+ case INSN_MMIO_READ_SIGN_EXTEND:
/* Sign extend based on operand size */
extend_size = insn.opnd_bytes;
if (size == 1 && val & BIT(7))
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 85a63a41c471..d096b04bf80e 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2974,17 +2974,19 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
{
- if (!x86_pmu_initialized()) {
+ /* This API doesn't currently support enumerating hybrid PMUs. */
+ if (WARN_ON_ONCE(cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) ||
+ !x86_pmu_initialized()) {
memset(cap, 0, sizeof(*cap));
return;
}
- cap->version = x86_pmu.version;
/*
- * KVM doesn't support the hybrid PMU yet.
- * Return the common value in global x86_pmu,
- * which available for all cores.
+ * Note, hybrid CPU models get tracked as having hybrid PMUs even when
+ * all E-cores are disabled via BIOS. When E-cores are disabled, the
+ * base PMU holds the correct number of counters for P-cores.
*/
+ cap->version = x86_pmu.version;
cap->num_counters_gp = x86_pmu.num_counters;
cap->num_counters_fixed = x86_pmu.num_counters_fixed;
cap->bit_width_gp = x86_pmu.cntval_bits;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index dfd2c124cdf8..bafdc2be479a 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6339,6 +6339,7 @@ __init int intel_pmu_init(void)
break;
case INTEL_FAM6_SAPPHIRERAPIDS_X:
+ case INTEL_FAM6_EMERALDRAPIDS_X:
pmem = true;
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids));
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index a2834bc93149..551741e79e03 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -41,6 +41,7 @@
* MSR_CORE_C1_RES: CORE C1 Residency Counter
* perf code: 0x00
* Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL
+ * MTL
* Scope: Core (each processor core has a MSR)
* MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
* perf code: 0x01
@@ -51,50 +52,50 @@
* perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
- * TGL,TNT,RKL,ADL,RPL,SPR
+ * TGL,TNT,RKL,ADL,RPL,SPR,MTL
* Scope: Core
* MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
* perf code: 0x03
* Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML,
- * ICL,TGL,RKL,ADL,RPL
+ * ICL,TGL,RKL,ADL,RPL,MTL
* Scope: Core
* MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
* perf code: 0x00
* Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
* KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL,
- * RPL,SPR
+ * RPL,SPR,MTL
* Scope: Package (physical package)
* MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
* perf code: 0x01
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
* GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL,
- * ADL,RPL
+ * ADL,RPL,MTL
* Scope: Package (physical package)
* MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
* perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
- * TGL,TNT,RKL,ADL,RPL,SPR
+ * TGL,TNT,RKL,ADL,RPL,SPR,MTL
* Scope: Package (physical package)
* MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
* perf code: 0x03
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL,
- * KBL,CML,ICL,TGL,RKL,ADL,RPL
+ * KBL,CML,ICL,TGL,RKL,ADL,RPL,MTL
* Scope: Package (physical package)
* MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter.
* perf code: 0x04
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL,
- * ADL,RPL
+ * ADL,RPL,MTL
* Scope: Package (physical package)
* MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter.
* perf code: 0x05
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL,
- * ADL,RPL
+ * ADL,RPL,MTL
* Scope: Package (physical package)
* MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
* perf code: 0x06
* Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL,
- * TNT,RKL,ADL,RPL
+ * TNT,RKL,ADL,RPL,MTL
* Scope: Package (physical package)
*
*/
@@ -676,6 +677,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &icx_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_cstates),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &icx_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &icx_cstates),
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &icl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &icl_cstates),
@@ -686,6 +688,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &adl_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &adl_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 6f1ccc57a692..459b1aafd4d4 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1833,6 +1833,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &spr_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init),
{},
};
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index ecced3a52668..c65d8906cbcf 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -69,6 +69,7 @@ static bool test_intel(int idx, void *data)
case INTEL_FAM6_BROADWELL_G:
case INTEL_FAM6_BROADWELL_X:
case INTEL_FAM6_SAPPHIRERAPIDS_X:
+ case INTEL_FAM6_EMERALDRAPIDS_X:
case INTEL_FAM6_ATOM_SILVERMONT:
case INTEL_FAM6_ATOM_SILVERMONT_D:
@@ -107,6 +108,8 @@ static bool test_intel(int idx, void *data)
case INTEL_FAM6_RAPTORLAKE:
case INTEL_FAM6_RAPTORLAKE_P:
case INTEL_FAM6_RAPTORLAKE_S:
+ case INTEL_FAM6_METEORLAKE:
+ case INTEL_FAM6_METEORLAKE_L:
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
return true;
break;
diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index a829492bca4c..52e6e7ed4f78 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -800,13 +800,18 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &model_hsx),
X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr),
+ X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &model_spr),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &model_skl),
{},
};
MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 65064d9f7fa6..8eb74cf386db 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -14,6 +14,7 @@
#include <asm/mmu.h>
#include <asm/mpspec.h>
#include <asm/x86_init.h>
+#include <asm/cpufeature.h>
#ifdef CONFIG_ACPI_APEI
# include <asm/pgtable_types.h>
@@ -63,6 +64,13 @@ extern int (*acpi_suspend_lowlevel)(void);
/* Physical address to resume after wakeup */
unsigned long acpi_get_wakeup_address(void);
+static inline bool acpi_skip_set_wakeup_address(void)
+{
+ return cpu_feature_enabled(X86_FEATURE_XENPV);
+}
+
+#define acpi_skip_set_wakeup_address acpi_skip_set_wakeup_address
+
/*
* Check if the CPU can handle C2 and deeper
*/
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 61012476d66e..8f39c46197b8 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -466,5 +466,6 @@
#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */
#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
+#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index b049d950612f..ca97442e8d49 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -39,7 +39,20 @@ static __always_inline unsigned long native_get_debugreg(int regno)
asm("mov %%db6, %0" :"=r" (val));
break;
case 7:
- asm("mov %%db7, %0" :"=r" (val));
+ /*
+ * Apply __FORCE_ORDER to DR7 reads to forbid re-ordering them
+ * with other code.
+ *
+ * This is needed because a DR7 access can cause a #VC exception
+ * when running under SEV-ES. Taking a #VC exception is not a
+ * safe thing to do just anywhere in the entry code and
+ * re-ordering might place the access into an unsafe location.
+ *
+ * This happened in the NMI handler, where the DR7 read was
+ * re-ordered to happen before the call to sev_es_ist_enter(),
+ * causing stack recursion.
+ */
+ asm volatile("mov %%db7, %0" : "=r" (val) : __FORCE_ORDER);
break;
default:
BUG();
@@ -66,7 +79,16 @@ static __always_inline void native_set_debugreg(int regno, unsigned long value)
asm("mov %0, %%db6" ::"r" (value));
break;
case 7:
- asm("mov %0, %%db7" ::"r" (value));
+ /*
+ * Apply __FORCE_ORDER to DR7 writes to forbid re-ordering them
+ * with other code.
+ *
+ * While is didn't happen with a DR7 write (see the DR7 read
+ * comment above which explains where it happened), add the
+ * __FORCE_ORDER here too to avoid similar problems in the
+ * future.
+ */
+ asm volatile("mov %0, %%db7" ::"r" (value), __FORCE_ORDER);
break;
default:
BUG();
diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index f07faa61c7f3..54368a43abf6 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -32,16 +32,16 @@ int insn_fetch_from_user_inatomic(struct pt_regs *regs,
bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs,
unsigned char buf[MAX_INSN_SIZE], int buf_size);
-enum mmio_type {
- MMIO_DECODE_FAILED,
- MMIO_WRITE,
- MMIO_WRITE_IMM,
- MMIO_READ,
- MMIO_READ_ZERO_EXTEND,
- MMIO_READ_SIGN_EXTEND,
- MMIO_MOVS,
+enum insn_mmio_type {
+ INSN_MMIO_DECODE_FAILED,
+ INSN_MMIO_WRITE,
+ INSN_MMIO_WRITE_IMM,
+ INSN_MMIO_READ,
+ INSN_MMIO_READ_ZERO_EXTEND,
+ INSN_MMIO_READ_SIGN_EXTEND,
+ INSN_MMIO_MOVS,
};
-enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes);
+enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes);
#endif /* _ASM_X86_INSN_EVAL_H */
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 347707d459c6..cbaf174d8efd 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -123,6 +123,8 @@
#define INTEL_FAM6_METEORLAKE 0xAC
#define INTEL_FAM6_METEORLAKE_L 0xAA
+#define INTEL_FAM6_LUNARLAKE_M 0xBD
+
/* "Small Core" Processors (Atom/E-Core) */
#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f35f1ff4427b..6aaae18f1854 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1111,6 +1111,7 @@ struct msr_bitmap_range {
/* Xen emulation context */
struct kvm_xen {
+ struct mutex xen_lock;
u32 xen_version;
bool long_mode;
bool runstate_update_flag;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 37ff47552bcb..d3fe82c5d6b6 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -566,6 +566,26 @@
#define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
#define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
+/* SNP feature bits enabled by the hypervisor */
+#define MSR_AMD64_SNP_VTOM BIT_ULL(3)
+#define MSR_AMD64_SNP_REFLECT_VC BIT_ULL(4)
+#define MSR_AMD64_SNP_RESTRICTED_INJ BIT_ULL(5)
+#define MSR_AMD64_SNP_ALT_INJ BIT_ULL(6)
+#define MSR_AMD64_SNP_DEBUG_SWAP BIT_ULL(7)
+#define MSR_AMD64_SNP_PREVENT_HOST_IBS BIT_ULL(8)
+#define MSR_AMD64_SNP_BTB_ISOLATION BIT_ULL(9)
+#define MSR_AMD64_SNP_VMPL_SSS BIT_ULL(10)
+#define MSR_AMD64_SNP_SECURE_TSC BIT_ULL(11)
+#define MSR_AMD64_SNP_VMGEXIT_PARAM BIT_ULL(12)
+#define MSR_AMD64_SNP_IBS_VIRT BIT_ULL(14)
+#define MSR_AMD64_SNP_VMSA_REG_PROTECTION BIT_ULL(16)
+#define MSR_AMD64_SNP_SMT_PROTECTION BIT_ULL(17)
+
+/* SNP feature bits reserved for future use. */
+#define MSR_AMD64_SNP_RESERVED_BIT13 BIT_ULL(13)
+#define MSR_AMD64_SNP_RESERVED_BIT15 BIT_ULL(15)
+#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, 18)
+
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
/* AMD Collaborative Processor Performance Control MSRs */
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index f69c168391aa..80e1df482337 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -116,6 +116,12 @@
#define SVM_VMGEXIT_AP_CREATE 1
#define SVM_VMGEXIT_AP_DESTROY 2
#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd
+#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe
+#define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \
+ /* SW_EXITINFO1[3:0] */ \
+ (((((u64)reason_set) & 0xf)) | \
+ /* SW_EXITINFO1[11:4] */ \
+ ((((u64)reason_code) & 0xff) << 4))
#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff
/* Exit code reserved for hypervisor/software use */
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index 1f60a2b27936..fdbb5f07448f 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -330,7 +330,16 @@ static void __init bp_init_freq_invariance(void)
static void disable_freq_invariance_workfn(struct work_struct *work)
{
+ int cpu;
+
static_branch_disable(&arch_scale_freq_key);
+
+ /*
+ * Set arch_freq_scale to a default value on all cpus
+ * This negates the effect of scaling
+ */
+ for_each_possible_cpu(cpu)
+ per_cpu(arch_freq_scale, cpu) = SCHED_CAPACITY_SCALE;
}
static DECLARE_WORK(disable_freq_invariance_work,
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d970ddb0cc65..bca0bd8f4846 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1981,6 +1981,8 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
if (ctrl == PR_SPEC_FORCE_DISABLE)
task_set_spec_ib_force_disable(task);
task_update_spec_tif(task);
+ if (task == current)
+ indirect_branch_prediction_barrier();
break;
default:
return -ERANGE;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9cfca3d7d0e2..f3cc7699e1e1 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1256,6 +1256,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
#define MMIO_SBDS BIT(2)
/* CPU is affected by RETbleed, speculating where you would not expect it */
#define RETBLEED BIT(3)
+/* CPU is affected by SMT (cross-thread) return predictions */
+#define SMT_RSB BIT(4)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
@@ -1287,8 +1289,8 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_AMD(0x15, RETBLEED),
VULNBL_AMD(0x16, RETBLEED),
- VULNBL_AMD(0x17, RETBLEED),
- VULNBL_HYGON(0x18, RETBLEED),
+ VULNBL_AMD(0x17, RETBLEED | SMT_RSB),
+ VULNBL_HYGON(0x18, RETBLEED | SMT_RSB),
{}
};
@@ -1406,6 +1408,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
!(ia32_cap & ARCH_CAP_PBRSB_NO))
setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
+ if (cpu_matches(cpu_vuln_blacklist, SMT_RSB))
+ setup_force_cpu_bug(X86_BUG_SMT_RSB);
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index efe0c30d3a12..77538abeb72a 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -146,6 +146,30 @@ static inline struct rmid_entry *__rmid_entry(u32 rmid)
return entry;
}
+static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val)
+{
+ u64 msr_val;
+
+ /*
+ * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured
+ * with a valid event code for supported resource type and the bits
+ * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID,
+ * IA32_QM_CTR.data (bits 61:0) reports the monitored data.
+ * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62)
+ * are error bits.
+ */
+ wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
+ rdmsrl(MSR_IA32_QM_CTR, msr_val);
+
+ if (msr_val & RMID_VAL_ERROR)
+ return -EIO;
+ if (msr_val & RMID_VAL_UNAVAIL)
+ return -EINVAL;
+
+ *val = msr_val;
+ return 0;
+}
+
static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom,
u32 rmid,
enum resctrl_event_id eventid)
@@ -172,8 +196,12 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
struct arch_mbm_state *am;
am = get_arch_mbm_state(hw_dom, rmid, eventid);
- if (am)
+ if (am) {
memset(am, 0, sizeof(*am));
+
+ /* Record any initial, non-zero count value. */
+ __rmid_read(rmid, eventid, &am->prev_msr);
+ }
}
static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
@@ -191,25 +219,14 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
struct arch_mbm_state *am;
u64 msr_val, chunks;
+ int ret;
if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask))
return -EINVAL;
- /*
- * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured
- * with a valid event code for supported resource type and the bits
- * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID,
- * IA32_QM_CTR.data (bits 61:0) reports the monitored data.
- * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62)
- * are error bits.
- */
- wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
- rdmsrl(MSR_IA32_QM_CTR, msr_val);
-
- if (msr_val & RMID_VAL_ERROR)
- return -EIO;
- if (msr_val & RMID_VAL_UNAVAIL)
- return -EINVAL;
+ ret = __rmid_read(rmid, eventid, &msr_val);
+ if (ret)
+ return ret;
am = get_arch_mbm_state(hw_dom, rmid, eventid);
if (am) {
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index e5a48f05e787..5993da21d822 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -580,8 +580,10 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
/*
* Ensure the task's closid and rmid are written before determining if
* the task is current that will decide if it will be interrupted.
+ * This pairs with the full barrier between the rq->curr update and
+ * resctrl_sched_in() during context switch.
*/
- barrier();
+ smp_mb();
/*
* By now, the task's closid and rmid are set. If the task is current
@@ -2402,6 +2404,14 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
WRITE_ONCE(t->rmid, to->mon.rmid);
/*
+ * Order the closid/rmid stores above before the loads
+ * in task_curr(). This pairs with the full barrier
+ * between the rq->curr update and resctrl_sched_in()
+ * during context switch.
+ */
+ smp_mb();
+
+ /*
* If the task is on a CPU, set the CPU in the mask.
* The detection is inaccurate as tasks might move or
* schedule before the smp function call takes place.
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 9730c88530fc..305514431f26 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -401,10 +401,8 @@ int crash_load_segments(struct kimage *image)
kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
ret = kexec_add_buffer(&kbuf);
- if (ret) {
- vfree((void *)image->elf_headers);
+ if (ret)
return ret;
- }
image->elf_load_addr = kbuf.mem;
pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 3aa5304200c5..4d8aff05a509 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -114,6 +114,7 @@ static void make_8259A_irq(unsigned int irq)
disable_irq_nosync(irq);
io_apic_irqs &= ~(1<<irq);
irq_set_chip_and_handler(irq, &i8259A_chip, handle_level_irq);
+ irq_set_status_flags(irq, IRQ_LEVEL);
enable_irq(irq);
lapic_assign_legacy_vector(irq, true);
}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index beb1bada1b0a..c683666876f1 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -65,8 +65,10 @@ void __init init_ISA_irqs(void)
legacy_pic->init(0);
- for (i = 0; i < nr_legacy_irqs(); i++)
+ for (i = 0; i < nr_legacy_irqs(); i++) {
irq_set_chip_and_handler(i, chip, handle_level_irq);
+ irq_set_status_flags(i, IRQ_LEVEL);
+ }
}
void __init init_IRQ(void)
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index b36f3c367cb2..695873c0f50b 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -625,7 +625,7 @@ static int prepare_emulation(struct kprobe *p, struct insn *insn)
/* 1 byte conditional jump */
p->ainsn.emulate_op = kprobe_emulate_jcc;
p->ainsn.jcc.type = opcode & 0xf;
- p->ainsn.rel32 = *(char *)insn->immediate.bytes;
+ p->ainsn.rel32 = insn->immediate.value;
break;
case 0x0f:
opcode = insn->opcode.bytes[1];
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index a428c62330d3..679026a640ef 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -1536,32 +1536,32 @@ static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt,
static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
{
struct insn *insn = &ctxt->insn;
+ enum insn_mmio_type mmio;
unsigned int bytes = 0;
- enum mmio_type mmio;
enum es_result ret;
u8 sign_byte;
long *reg_data;
mmio = insn_decode_mmio(insn, &bytes);
- if (mmio == MMIO_DECODE_FAILED)
+ if (mmio == INSN_MMIO_DECODE_FAILED)
return ES_DECODE_FAILED;
- if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) {
+ if (mmio != INSN_MMIO_WRITE_IMM && mmio != INSN_MMIO_MOVS) {
reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs);
if (!reg_data)
return ES_DECODE_FAILED;
}
switch (mmio) {
- case MMIO_WRITE:
+ case INSN_MMIO_WRITE:
memcpy(ghcb->shared_buffer, reg_data, bytes);
ret = vc_do_mmio(ghcb, ctxt, bytes, false);
break;
- case MMIO_WRITE_IMM:
+ case INSN_MMIO_WRITE_IMM:
memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes);
ret = vc_do_mmio(ghcb, ctxt, bytes, false);
break;
- case MMIO_READ:
+ case INSN_MMIO_READ:
ret = vc_do_mmio(ghcb, ctxt, bytes, true);
if (ret)
break;
@@ -1572,7 +1572,7 @@ static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
memcpy(reg_data, ghcb->shared_buffer, bytes);
break;
- case MMIO_READ_ZERO_EXTEND:
+ case INSN_MMIO_READ_ZERO_EXTEND:
ret = vc_do_mmio(ghcb, ctxt, bytes, true);
if (ret)
break;
@@ -1581,7 +1581,7 @@ static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
memset(reg_data, 0, insn->opnd_bytes);
memcpy(reg_data, ghcb->shared_buffer, bytes);
break;
- case MMIO_READ_SIGN_EXTEND:
+ case INSN_MMIO_READ_SIGN_EXTEND:
ret = vc_do_mmio(ghcb, ctxt, bytes, true);
if (ret)
break;
@@ -1600,7 +1600,7 @@ static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
memset(reg_data, sign_byte, insn->opnd_bytes);
memcpy(reg_data, ghcb->shared_buffer, bytes);
break;
- case MMIO_MOVS:
+ case INSN_MMIO_MOVS:
ret = vc_handle_mmio_movs(ctxt, bytes);
break;
default:
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b14653b61470..596061c1610e 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -770,16 +770,22 @@ struct kvm_cpuid_array {
int nent;
};
+static struct kvm_cpuid_entry2 *get_next_cpuid(struct kvm_cpuid_array *array)
+{
+ if (array->nent >= array->maxnent)
+ return NULL;
+
+ return &array->entries[array->nent++];
+}
+
static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array,
u32 function, u32 index)
{
- struct kvm_cpuid_entry2 *entry;
+ struct kvm_cpuid_entry2 *entry = get_next_cpuid(array);
- if (array->nent >= array->maxnent)
+ if (!entry)
return NULL;
- entry = &array->entries[array->nent++];
-
memset(entry, 0, sizeof(*entry));
entry->function = function;
entry->index = index;
@@ -956,22 +962,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->edx = edx.full;
break;
}
- /*
- * Per Intel's SDM, the 0x1f is a superset of 0xb,
- * thus they can be handled by common code.
- */
case 0x1f:
case 0xb:
/*
- * Populate entries until the level type (ECX[15:8]) of the
- * previous entry is zero. Note, CPUID EAX.{0x1f,0xb}.0 is
- * the starting entry, filled by the primary do_host_cpuid().
+ * No topology; a valid topology is indicated by the presence
+ * of subleaf 1.
*/
- for (i = 1; entry->ecx & 0xff00; ++i) {
- entry = do_host_cpuid(array, function, i);
- if (!entry)
- goto out;
- }
+ entry->eax = entry->ebx = entry->ecx = 0;
break;
case 0xd: {
u64 permitted_xcr0 = kvm_caps.supported_xcr0 & xstate_get_guest_group_perm();
@@ -1202,6 +1199,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->ebx = entry->ecx = entry->edx = 0;
break;
case 0x8000001e:
+ /* Do not return host topology information. */
+ entry->eax = entry->ebx = entry->ecx = 0;
+ entry->edx = 0; /* reserved */
break;
case 0x8000001F:
if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) {
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index cdb91009701d..ee67ba625094 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -165,15 +165,27 @@ static inline void kvm_init_pmu_capability(void)
{
bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL;
- perf_get_x86_pmu_capability(&kvm_pmu_cap);
-
- /*
- * For Intel, only support guest architectural pmu
- * on a host with architectural pmu.
- */
- if ((is_intel && !kvm_pmu_cap.version) || !kvm_pmu_cap.num_counters_gp)
+ /*
+ * Hybrid PMUs don't play nice with virtualization without careful
+ * configuration by userspace, and KVM's APIs for reporting supported
+ * vPMU features do not account for hybrid PMUs. Disable vPMU support
+ * for hybrid PMUs until KVM gains a way to let userspace opt-in.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
enable_pmu = false;
+ if (enable_pmu) {
+ perf_get_x86_pmu_capability(&kvm_pmu_cap);
+
+ /*
+ * For Intel, only support guest architectural pmu
+ * on a host with architectural pmu.
+ */
+ if ((is_intel && !kvm_pmu_cap.version) ||
+ !kvm_pmu_cap.num_counters_gp)
+ enable_pmu = false;
+ }
+
if (!enable_pmu) {
memset(&kvm_pmu_cap, 0, sizeof(kvm_pmu_cap));
return;
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index bc9cd7086fa9..add65dd59756 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -138,15 +138,13 @@ void recalc_intercepts(struct vcpu_svm *svm)
c->intercepts[i] = h->intercepts[i];
if (g->int_ctl & V_INTR_MASKING_MASK) {
- /* We only want the cr8 intercept bits of L1 */
- vmcb_clr_intercept(c, INTERCEPT_CR8_READ);
- vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE);
-
/*
- * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not
- * affect any interrupt we may want to inject; therefore,
- * interrupt window vmexits are irrelevant to L0.
+ * Once running L2 with HF_VINTR_MASK, EFLAGS.IF and CR8
+ * does not affect any interrupt we may want to inject;
+ * therefore, writes to CR8 are irrelevant to L0, as are
+ * interrupt window vmexits.
*/
+ vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE);
vmcb_clr_intercept(c, INTERCEPT_VINTR);
}
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index fc9008dbed33..7eec0226d56a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3440,18 +3440,15 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var)
{
u32 ar;
- if (var->unusable || !var->present)
- ar = 1 << 16;
- else {
- ar = var->type & 15;
- ar |= (var->s & 1) << 4;
- ar |= (var->dpl & 3) << 5;
- ar |= (var->present & 1) << 7;
- ar |= (var->avl & 1) << 12;
- ar |= (var->l & 1) << 13;
- ar |= (var->db & 1) << 14;
- ar |= (var->g & 1) << 15;
- }
+ ar = var->type & 15;
+ ar |= (var->s & 1) << 4;
+ ar |= (var->dpl & 3) << 5;
+ ar |= (var->present & 1) << 7;
+ ar |= (var->avl & 1) << 12;
+ ar |= (var->l & 1) << 13;
+ ar |= (var->db & 1) << 14;
+ ar |= (var->g & 1) << 15;
+ ar |= (var->unusable || !var->present) << 16;
return ar;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index da4bbd043a7b..a2c299d47e69 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -191,6 +191,10 @@ module_param(enable_pmu, bool, 0444);
bool __read_mostly eager_page_split = true;
module_param(eager_page_split, bool, 0644);
+/* Enable/disable SMT_RSB bug mitigation */
+bool __read_mostly mitigate_smt_rsb;
+module_param(mitigate_smt_rsb, bool, 0444);
+
/*
* Restoring the host value for MSRs that are only consumed when running in
* usermode, e.g. SYSCALL MSRs and TSC_AUX, can be deferred until the CPU
@@ -4448,10 +4452,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_CLOCK_VALID_FLAGS;
break;
case KVM_CAP_X86_DISABLE_EXITS:
- r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE |
- KVM_X86_DISABLE_EXITS_CSTATE;
- if(kvm_can_mwait_in_guest())
- r |= KVM_X86_DISABLE_EXITS_MWAIT;
+ r = KVM_X86_DISABLE_EXITS_PAUSE;
+
+ if (!mitigate_smt_rsb) {
+ r |= KVM_X86_DISABLE_EXITS_HLT |
+ KVM_X86_DISABLE_EXITS_CSTATE;
+
+ if (kvm_can_mwait_in_guest())
+ r |= KVM_X86_DISABLE_EXITS_MWAIT;
+ }
break;
case KVM_CAP_X86_SMM:
if (!IS_ENABLED(CONFIG_KVM_SMM))
@@ -5254,12 +5263,11 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
{
unsigned long val;
+ memset(dbgregs, 0, sizeof(*dbgregs));
memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
kvm_get_dr(vcpu, 6, &val);
dbgregs->dr6 = val;
dbgregs->dr7 = vcpu->arch.dr7;
- dbgregs->flags = 0;
- memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
}
static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
@@ -6227,15 +6235,26 @@ split_irqchip_unlock:
if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
break;
- if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
- kvm_can_mwait_in_guest())
- kvm->arch.mwait_in_guest = true;
- if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
- kvm->arch.hlt_in_guest = true;
if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
kvm->arch.pause_in_guest = true;
- if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
- kvm->arch.cstate_in_guest = true;
+
+#define SMT_RSB_MSG "This processor is affected by the Cross-Thread Return Predictions vulnerability. " \
+ "KVM_CAP_X86_DISABLE_EXITS should only be used with SMT disabled or trusted guests."
+
+ if (!mitigate_smt_rsb) {
+ if (boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible() &&
+ (cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE))
+ pr_warn_once(SMT_RSB_MSG);
+
+ if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
+ kvm_can_mwait_in_guest())
+ kvm->arch.mwait_in_guest = true;
+ if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
+ kvm->arch.hlt_in_guest = true;
+ if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
+ kvm->arch.cstate_in_guest = true;
+ }
+
r = 0;
break;
case KVM_CAP_MSR_PLATFORM_INFO:
@@ -13456,6 +13475,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit);
static int __init kvm_x86_init(void)
{
kvm_mmu_x86_module_init();
+ mitigate_smt_rsb &= boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible();
return 0;
}
module_init(kvm_x86_init);
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 2e29bdc2949c..8fd41f5deae3 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -271,7 +271,15 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
* Attempt to obtain the GPC lock on *both* (if there are two)
* gfn_to_pfn caches that cover the region.
*/
- read_lock_irqsave(&gpc1->lock, flags);
+ if (atomic) {
+ local_irq_save(flags);
+ if (!read_trylock(&gpc1->lock)) {
+ local_irq_restore(flags);
+ return;
+ }
+ } else {
+ read_lock_irqsave(&gpc1->lock, flags);
+ }
while (!kvm_gpc_check(gpc1, user_len1)) {
read_unlock_irqrestore(&gpc1->lock, flags);
@@ -304,9 +312,18 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
* The guest's runstate_info is split across two pages and we
* need to hold and validate both GPCs simultaneously. We can
* declare a lock ordering GPC1 > GPC2 because nothing else
- * takes them more than one at a time.
+ * takes them more than one at a time. Set a subclass on the
+ * gpc1 lock to make lockdep shut up about it.
*/
- read_lock(&gpc2->lock);
+ lock_set_subclass(&gpc1->lock.dep_map, 1, _THIS_IP_);
+ if (atomic) {
+ if (!read_trylock(&gpc2->lock)) {
+ read_unlock_irqrestore(&gpc1->lock, flags);
+ return;
+ }
+ } else {
+ read_lock(&gpc2->lock);
+ }
if (!kvm_gpc_check(gpc2, user_len2)) {
read_unlock(&gpc2->lock);
@@ -590,26 +607,26 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
if (!IS_ENABLED(CONFIG_64BIT) && data->u.long_mode) {
r = -EINVAL;
} else {
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
kvm->arch.xen.long_mode = !!data->u.long_mode;
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
r = 0;
}
break;
case KVM_XEN_ATTR_TYPE_SHARED_INFO:
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
r = kvm_xen_shared_info_init(kvm, data->u.shared_info.gfn);
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
break;
case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
if (data->u.vector && data->u.vector < 0x10)
r = -EINVAL;
else {
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
kvm->arch.xen.upcall_vector = data->u.vector;
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
r = 0;
}
break;
@@ -619,9 +636,9 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
break;
case KVM_XEN_ATTR_TYPE_XEN_VERSION:
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
kvm->arch.xen.xen_version = data->u.xen_version;
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
r = 0;
break;
@@ -630,9 +647,9 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
r = -EOPNOTSUPP;
break;
}
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
kvm->arch.xen.runstate_update_flag = !!data->u.runstate_update_flag;
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
r = 0;
break;
@@ -647,7 +664,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
{
int r = -ENOENT;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
switch (data->type) {
case KVM_XEN_ATTR_TYPE_LONG_MODE:
@@ -686,7 +703,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
break;
}
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
return r;
}
@@ -694,7 +711,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
{
int idx, r = -ENOENT;
- mutex_lock(&vcpu->kvm->lock);
+ mutex_lock(&vcpu->kvm->arch.xen.xen_lock);
idx = srcu_read_lock(&vcpu->kvm->srcu);
switch (data->type) {
@@ -922,7 +939,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
}
srcu_read_unlock(&vcpu->kvm->srcu, idx);
- mutex_unlock(&vcpu->kvm->lock);
+ mutex_unlock(&vcpu->kvm->arch.xen.xen_lock);
return r;
}
@@ -930,7 +947,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
{
int r = -ENOENT;
- mutex_lock(&vcpu->kvm->lock);
+ mutex_lock(&vcpu->kvm->arch.xen.xen_lock);
switch (data->type) {
case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO:
@@ -1013,7 +1030,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
break;
}
- mutex_unlock(&vcpu->kvm->lock);
+ mutex_unlock(&vcpu->kvm->arch.xen.xen_lock);
return r;
}
@@ -1106,7 +1123,7 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
xhc->blob_size_32 || xhc->blob_size_64))
return -EINVAL;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
if (xhc->msr && !kvm->arch.xen_hvm_config.msr)
static_branch_inc(&kvm_xen_enabled.key);
@@ -1115,7 +1132,7 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc));
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
return 0;
}
@@ -1658,15 +1675,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm)
mm_borrowed = true;
}
- /*
- * For the irqfd workqueue, using the main kvm->lock mutex is
- * fine since this function is invoked from kvm_set_irq() with
- * no other lock held, no srcu. In future if it will be called
- * directly from a vCPU thread (e.g. on hypercall for an IPI)
- * then it may need to switch to using a leaf-node mutex for
- * serializing the shared_info mapping.
- */
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
/*
* It is theoretically possible for the page to be unmapped
@@ -1695,7 +1704,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm)
srcu_read_unlock(&kvm->srcu, idx);
} while(!rc);
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
if (mm_borrowed)
kthread_unuse_mm(kvm->mm);
@@ -1811,7 +1820,7 @@ static int kvm_xen_eventfd_update(struct kvm *kvm,
int ret;
/* Protect writes to evtchnfd as well as the idr lookup. */
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
evtchnfd = idr_find(&kvm->arch.xen.evtchn_ports, port);
ret = -ENOENT;
@@ -1842,7 +1851,7 @@ static int kvm_xen_eventfd_update(struct kvm *kvm,
}
ret = 0;
out_unlock:
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
return ret;
}
@@ -1905,10 +1914,10 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm,
evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority;
}
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
ret = idr_alloc(&kvm->arch.xen.evtchn_ports, evtchnfd, port, port + 1,
GFP_KERNEL);
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
if (ret >= 0)
return 0;
@@ -1926,9 +1935,9 @@ static int kvm_xen_eventfd_deassign(struct kvm *kvm, u32 port)
{
struct evtchnfd *evtchnfd;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
evtchnfd = idr_remove(&kvm->arch.xen.evtchn_ports, port);
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
if (!evtchnfd)
return -ENOENT;
@@ -1946,7 +1955,7 @@ static int kvm_xen_eventfd_reset(struct kvm *kvm)
int i;
int n = 0;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
/*
* Because synchronize_srcu() cannot be called inside the
@@ -1958,7 +1967,7 @@ static int kvm_xen_eventfd_reset(struct kvm *kvm)
all_evtchnfds = kmalloc_array(n, sizeof(struct evtchnfd *), GFP_KERNEL);
if (!all_evtchnfds) {
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
return -ENOMEM;
}
@@ -1967,7 +1976,7 @@ static int kvm_xen_eventfd_reset(struct kvm *kvm)
all_evtchnfds[n++] = evtchnfd;
idr_remove(&kvm->arch.xen.evtchn_ports, evtchnfd->send_port);
}
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
synchronize_srcu(&kvm->srcu);
@@ -2069,6 +2078,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
void kvm_xen_init_vm(struct kvm *kvm)
{
+ mutex_init(&kvm->arch.xen.xen_lock);
idr_init(&kvm->arch.xen.evtchn_ports);
kvm_gpc_init(&kvm->arch.xen.shinfo_cache, kvm, NULL, KVM_HOST_USES_PFN);
}
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 21104c41cba0..558a605929db 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -1595,16 +1595,16 @@ bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs,
* Returns:
*
* Type of the instruction. Size of the memory operand is stored in
- * @bytes. If decode failed, MMIO_DECODE_FAILED returned.
+ * @bytes. If decode failed, INSN_MMIO_DECODE_FAILED returned.
*/
-enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes)
+enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes)
{
- enum mmio_type type = MMIO_DECODE_FAILED;
+ enum insn_mmio_type type = INSN_MMIO_DECODE_FAILED;
*bytes = 0;
if (insn_get_opcode(insn))
- return MMIO_DECODE_FAILED;
+ return INSN_MMIO_DECODE_FAILED;
switch (insn->opcode.bytes[0]) {
case 0x88: /* MOV m8,r8 */
@@ -1613,7 +1613,7 @@ enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes)
case 0x89: /* MOV m16/m32/m64, r16/m32/m64 */
if (!*bytes)
*bytes = insn->opnd_bytes;
- type = MMIO_WRITE;
+ type = INSN_MMIO_WRITE;
break;
case 0xc6: /* MOV m8, imm8 */
@@ -1622,7 +1622,7 @@ enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes)
case 0xc7: /* MOV m16/m32/m64, imm16/imm32/imm64 */
if (!*bytes)
*bytes = insn->opnd_bytes;
- type = MMIO_WRITE_IMM;
+ type = INSN_MMIO_WRITE_IMM;
break;
case 0x8a: /* MOV r8, m8 */
@@ -1631,7 +1631,7 @@ enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes)
case 0x8b: /* MOV r16/r32/r64, m16/m32/m64 */
if (!*bytes)
*bytes = insn->opnd_bytes;
- type = MMIO_READ;
+ type = INSN_MMIO_READ;
break;
case 0xa4: /* MOVS m8, m8 */
@@ -1640,7 +1640,7 @@ enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes)
case 0xa5: /* MOVS m16/m32/m64, m16/m32/m64 */
if (!*bytes)
*bytes = insn->opnd_bytes;
- type = MMIO_MOVS;
+ type = INSN_MMIO_MOVS;
break;
case 0x0f: /* Two-byte instruction */
@@ -1651,7 +1651,7 @@ enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes)
case 0xb7: /* MOVZX r32/r64, m16 */
if (!*bytes)
*bytes = 2;
- type = MMIO_READ_ZERO_EXTEND;
+ type = INSN_MMIO_READ_ZERO_EXTEND;
break;
case 0xbe: /* MOVSX r16/r32/r64, m8 */
@@ -1660,7 +1660,7 @@ enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes)
case 0xbf: /* MOVSX r32/r64, m16 */
if (!*bytes)
*bytes = 2;
- type = MMIO_READ_SIGN_EXTEND;
+ type = INSN_MMIO_READ_SIGN_EXTEND;
break;
}
break;
diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S
index a1f9416bf67a..6ff2f56cb0f7 100644
--- a/arch/x86/lib/iomap_copy_64.S
+++ b/arch/x86/lib/iomap_copy_64.S
@@ -10,6 +10,6 @@
*/
SYM_FUNC_START(__iowrite32_copy)
movl %edx,%ecx
- rep movsd
+ rep movsl
RET
SYM_FUNC_END(__iowrite32_copy)
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index d3987359d441..cb258f58fdc8 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -26,6 +26,7 @@
#include <asm/pti.h>
#include <asm/text-patching.h>
#include <asm/memtype.h>
+#include <asm/paravirt.h>
/*
* We need to define the tracepoints somewhere, and tlb.c
@@ -804,6 +805,9 @@ void __init poking_init(void)
poking_mm = mm_alloc();
BUG_ON(!poking_mm);
+ /* Xen PV guests need the PGD to be pinned. */
+ paravirt_arch_dup_mmap(NULL, poking_mm);
+
/*
* Randomize the poking address, but make sure that the following page
* will be mapped at the same PMD. We need 2 pages, so find space for 3,
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 758cbfe55daa..4b3efaa82ab7 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -12,6 +12,7 @@
*/
#include <linux/acpi.h>
+#include <linux/efi.h>
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/bitmap.h>
@@ -442,17 +443,42 @@ static bool is_acpi_reserved(u64 start, u64 end, enum e820_type not_used)
return mcfg_res.flags;
}
+static bool is_efi_mmio(u64 start, u64 end, enum e820_type not_used)
+{
+#ifdef CONFIG_EFI
+ efi_memory_desc_t *md;
+ u64 size, mmio_start, mmio_end;
+
+ for_each_efi_memory_desc(md) {
+ if (md->type == EFI_MEMORY_MAPPED_IO) {
+ size = md->num_pages << EFI_PAGE_SHIFT;
+ mmio_start = md->phys_addr;
+ mmio_end = mmio_start + size;
+
+ /*
+ * N.B. Caller supplies (start, start + size),
+ * so to match, mmio_end is the first address
+ * *past* the EFI_MEMORY_MAPPED_IO area.
+ */
+ if (mmio_start <= start && end <= mmio_end)
+ return true;
+ }
+ }
+#endif
+
+ return false;
+}
+
typedef bool (*check_reserved_t)(u64 start, u64 end, enum e820_type type);
static bool __ref is_mmconf_reserved(check_reserved_t is_reserved,
struct pci_mmcfg_region *cfg,
- struct device *dev, int with_e820)
+ struct device *dev, const char *method)
{
u64 addr = cfg->res.start;
u64 size = resource_size(&cfg->res);
u64 old_size = size;
int num_buses;
- char *method = with_e820 ? "E820" : "ACPI motherboard resources";
while (!is_reserved(addr, addr + size, E820_TYPE_RESERVED)) {
size >>= 1;
@@ -464,10 +490,10 @@ static bool __ref is_mmconf_reserved(check_reserved_t is_reserved,
return false;
if (dev)
- dev_info(dev, "MMCONFIG at %pR reserved in %s\n",
+ dev_info(dev, "MMCONFIG at %pR reserved as %s\n",
&cfg->res, method);
else
- pr_info(PREFIX "MMCONFIG at %pR reserved in %s\n",
+ pr_info(PREFIX "MMCONFIG at %pR reserved as %s\n",
&cfg->res, method);
if (old_size != size) {
@@ -500,7 +526,8 @@ static bool __ref
pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int early)
{
if (!early && !acpi_disabled) {
- if (is_mmconf_reserved(is_acpi_reserved, cfg, dev, 0))
+ if (is_mmconf_reserved(is_acpi_reserved, cfg, dev,
+ "ACPI motherboard resource"))
return true;
if (dev)
@@ -513,6 +540,10 @@ pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int e
"MMCONFIG at %pR not reserved in "
"ACPI motherboard resources\n",
&cfg->res);
+
+ if (is_mmconf_reserved(is_efi_mmio, cfg, dev,
+ "EfiMemoryMappedIO"))
+ return true;
}
/*
@@ -527,7 +558,8 @@ pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int e
/* Don't try to do this check unless configuration
type 1 is available. how about type 2 ?*/
if (raw_pci_ops)
- return is_mmconf_reserved(e820__mapped_all, cfg, dev, 1);
+ return is_mmconf_reserved(e820__mapped_all, cfg, dev,
+ "E820 entry");
return false;
}
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index b94f727251b6..8babce71915f 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -392,6 +392,7 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev)
msi_for_each_desc(msidesc, &dev->dev, MSI_DESC_ASSOCIATED) {
for (i = 0; i < msidesc->nvec_used; i++)
xen_destroy_irq(msidesc->irq + i);
+ msidesc->irq = 0;
}
}
@@ -433,6 +434,7 @@ static struct msi_domain_ops xen_pci_msi_domain_ops = {
};
static struct msi_domain_info xen_pci_msi_domain_info = {
+ .flags = MSI_FLAG_PCI_MSIX | MSI_FLAG_FREE_MSI_DESCS | MSI_FLAG_DEV_SYSFS,
.ops = &xen_pci_msi_domain_ops,
};
diff --git a/arch/x86/um/elfcore.c b/arch/x86/um/elfcore.c
index 48a3eb09d951..650cdbbdaf45 100644
--- a/arch/x86/um/elfcore.c
+++ b/arch/x86/um/elfcore.c
@@ -7,7 +7,7 @@
#include <asm/elf.h>
-Elf32_Half elf_core_extra_phdrs(void)
+Elf32_Half elf_core_extra_phdrs(struct coredump_params *cprm)
{
return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0;
}
@@ -60,7 +60,7 @@ int elf_core_write_extra_data(struct coredump_params *cprm)
return 1;
}
-size_t elf_core_extra_data_size(void)
+size_t elf_core_extra_data_size(struct coredump_params *cprm)
{
if ( vsyscall_ehdr ) {
const struct elfhdr *const ehdrp =
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 58db86f7b384..9bdc3b656b2c 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -134,11 +134,6 @@ static inline unsigned p2m_mid_index(unsigned long pfn)
return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE;
}
-static inline unsigned p2m_index(unsigned long pfn)
-{
- return pfn % P2M_PER_PAGE;
-}
-
static void p2m_top_mfn_init(unsigned long *top)
{
unsigned i;
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 228e4dff5fb2..a6d09fe04831 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -154,11 +154,6 @@ struct thread_struct {
unsigned long ra; /* kernel's a0: return address and window call size */
unsigned long sp; /* kernel's a1: stack pointer */
- /* struct xtensa_cpuinfo info; */
-
- unsigned long bad_vaddr; /* last user fault */
- unsigned long bad_uaddr; /* last kernel fault accessing user space */
- unsigned long error_code;
#ifdef CONFIG_HAVE_HW_BREAKPOINT
struct perf_event *ptrace_bp[XCHAL_NUM_IBREAK];
struct perf_event *ptrace_wp[XCHAL_NUM_DBREAK];
@@ -176,10 +171,6 @@ struct thread_struct {
{ \
ra: 0, \
sp: sizeof(init_stack) + (long) &init_stack, \
- /*info: {0}, */ \
- bad_vaddr: 0, \
- bad_uaddr: 0, \
- error_code: 0, \
}
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 0c25e035ff10..cd98366a9b23 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -362,8 +362,6 @@ static void do_unaligned_user(struct pt_regs *regs)
__die_if_kernel("Unhandled unaligned exception in kernel",
regs, SIGKILL);
- current->thread.bad_vaddr = regs->excvaddr;
- current->thread.error_code = -3;
pr_info_ratelimited("Unaligned memory access to %08lx in '%s' "
"(pid = %d, pc = %#010lx)\n",
regs->excvaddr, current->comm,
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 8c781b05c0bd..faf7cf35a0ee 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -206,8 +206,6 @@ good_area:
bad_area:
mmap_read_unlock(mm);
if (user_mode(regs)) {
- current->thread.bad_vaddr = address;
- current->thread.error_code = is_write;
force_sig_fault(SIGSEGV, code, (void *) address);
return;
}
@@ -232,7 +230,6 @@ do_sigbus:
/* Send a sigbus, regardless of whether we were in kernel
* or user mode.
*/
- current->thread.bad_vaddr = address;
force_sig_fault(SIGBUS, BUS_ADRERR, (void *) address);
/* Kernel mode? Handle exceptions or die */
@@ -252,7 +249,6 @@ bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
if ((entry = search_exception_tables(regs->pc)) != NULL) {
pr_debug("%s: Exception at pc=%#010lx (%lx)\n",
current->comm, regs->pc, entry->fixup);
- current->thread.bad_uaddr = address;
regs->pc = entry->fixup;
return;
}
diff --git a/block/Kconfig b/block/Kconfig
index 444c5ab3b67e..5d9d9c84d516 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -6,7 +6,6 @@ menuconfig BLOCK
bool "Enable the block layer" if EXPERT
default y
select SBITMAP
- select SRCU
help
Provide block layer support for the kernel.
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 1b2829e99dad..0fbde0fc0628 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -316,14 +316,12 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
static void bfqg_get(struct bfq_group *bfqg)
{
- bfqg->ref++;
+ refcount_inc(&bfqg->ref);
}
static void bfqg_put(struct bfq_group *bfqg)
{
- bfqg->ref--;
-
- if (bfqg->ref == 0)
+ if (refcount_dec_and_test(&bfqg->ref))
kfree(bfqg);
}
@@ -530,7 +528,7 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q,
}
/* see comments in bfq_bic_update_cgroup for why refcounting */
- bfqg_get(bfqg);
+ refcount_set(&bfqg->ref, 1);
return &bfqg->pd;
}
@@ -771,8 +769,8 @@ static void __bfq_bic_change_cgroup(struct bfq_data *bfqd,
* request from the old cgroup.
*/
bfq_put_cooperator(sync_bfqq);
- bfq_release_process_ref(bfqd, sync_bfqq);
bic_set_bfqq(bic, NULL, true);
+ bfq_release_process_ref(bfqd, sync_bfqq);
}
}
}
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index ccf2204477a5..380e9bda2e57 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5425,9 +5425,11 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
bfqq = bic_to_bfqq(bic, false);
if (bfqq) {
- bfq_release_process_ref(bfqd, bfqq);
+ struct bfq_queue *old_bfqq = bfqq;
+
bfqq = bfq_get_queue(bfqd, bio, false, bic, true);
bic_set_bfqq(bic, bfqq, false);
+ bfq_release_process_ref(bfqd, old_bfqq);
}
bfqq = bic_to_bfqq(bic, true);
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 41aa151ccc22..466e4865ace6 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -928,7 +928,7 @@ struct bfq_group {
char blkg_path[128];
/* reference counter (see comments in bfq_bic_update_cgroup) */
- int ref;
+ refcount_t ref;
/* Is bfq_group still online? */
bool online;
diff --git a/block/bio.c b/block/bio.c
index 5f96fcae3f75..ab59a491a883 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1401,6 +1401,27 @@ void __bio_advance(struct bio *bio, unsigned bytes)
}
EXPORT_SYMBOL(__bio_advance);
+void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
+ struct bio *src, struct bvec_iter *src_iter)
+{
+ while (src_iter->bi_size && dst_iter->bi_size) {
+ struct bio_vec src_bv = bio_iter_iovec(src, *src_iter);
+ struct bio_vec dst_bv = bio_iter_iovec(dst, *dst_iter);
+ unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len);
+ void *src_buf = bvec_kmap_local(&src_bv);
+ void *dst_buf = bvec_kmap_local(&dst_bv);
+
+ memcpy(dst_buf, src_buf, bytes);
+
+ kunmap_local(dst_buf);
+ kunmap_local(src_buf);
+
+ bio_advance_iter_single(src, src_iter, bytes);
+ bio_advance_iter_single(dst, dst_iter, bytes);
+ }
+}
+EXPORT_SYMBOL(bio_copy_data_iter);
+
/**
* bio_copy_data - copy contents of data buffers from one bio to another
* @src: source bio
@@ -1414,21 +1435,7 @@ void bio_copy_data(struct bio *dst, struct bio *src)
struct bvec_iter src_iter = src->bi_iter;
struct bvec_iter dst_iter = dst->bi_iter;
- while (src_iter.bi_size && dst_iter.bi_size) {
- struct bio_vec src_bv = bio_iter_iovec(src, src_iter);
- struct bio_vec dst_bv = bio_iter_iovec(dst, dst_iter);
- unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len);
- void *src_buf = bvec_kmap_local(&src_bv);
- void *dst_buf = bvec_kmap_local(&dst_bv);
-
- memcpy(dst_buf, src_buf, bytes);
-
- kunmap_local(dst_buf);
- kunmap_local(src_buf);
-
- bio_advance_iter_single(src, &src_iter, bytes);
- bio_advance_iter_single(dst, &dst_iter, bytes);
- }
+ bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
}
EXPORT_SYMBOL(bio_copy_data);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index ce6a2b7d3dfb..9ac1efb053e0 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1455,6 +1455,10 @@ retry:
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
pol->pd_init_fn(blkg->pd[pol->plid]);
+ if (pol->pd_online_fn)
+ list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
+ pol->pd_online_fn(blkg->pd[pol->plid]);
+
__set_bit(pol->plid, q->blkcg_pols);
ret = 0;
@@ -1997,6 +2001,10 @@ void blk_cgroup_bio_start(struct bio *bio)
struct blkg_iostat_set *bis;
unsigned long flags;
+ /* Root-level stats are sourced from system-wide IO stats */
+ if (!cgroup_parent(blkcg->css.cgroup))
+ return;
+
cpu = get_cpu();
bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu);
flags = u64_stats_update_begin_irqsave(&bis->sync);
diff --git a/block/blk-core.c b/block/blk-core.c
index 9321767470dc..b5098355d8b2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -283,12 +283,9 @@ static void blk_free_queue(struct request_queue *q)
*
* Decrements the refcount of the request_queue and free it when the refcount
* reaches 0.
- *
- * Context: Can sleep.
*/
void blk_put_queue(struct request_queue *q)
{
- might_sleep();
if (refcount_dec_and_test(&q->refs))
blk_free_queue(q);
}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 35a8f75cc45d..64bf7d9dd8e8 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -276,7 +276,7 @@ static bool bvec_split_segs(const struct queue_limits *lim,
* responsible for ensuring that @bs is only destroyed after processing of the
* split bio has finished.
*/
-static struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
+struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
unsigned *segs, struct bio_set *bs, unsigned max_bytes)
{
struct bio_vec bv, bvprv, *bvprvp = NULL;
@@ -309,6 +309,16 @@ static struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
*segs = nsegs;
return NULL;
split:
+ /*
+ * We can't sanely support splitting for a REQ_NOWAIT bio. End it
+ * with EAGAIN if splitting is required and return an error pointer.
+ */
+ if (bio->bi_opf & REQ_NOWAIT) {
+ bio->bi_status = BLK_STS_AGAIN;
+ bio_endio(bio);
+ return ERR_PTR(-EAGAIN);
+ }
+
*segs = nsegs;
/*
@@ -326,6 +336,7 @@ split:
bio_clear_polled(bio);
return bio_split(bio, bytes >> SECTOR_SHIFT, GFP_NOIO, bs);
}
+EXPORT_SYMBOL_GPL(bio_split_rw);
/**
* __bio_split_to_limits - split a bio to fit the queue limits
@@ -358,11 +369,13 @@ struct bio *__bio_split_to_limits(struct bio *bio,
default:
split = bio_split_rw(bio, lim, nr_segs, bs,
get_max_io_size(bio, lim) << SECTOR_SHIFT);
+ if (IS_ERR(split))
+ return NULL;
break;
}
if (split) {
- /* there isn't chance to merge the splitted bio */
+ /* there isn't chance to merge the split bio */
split->bi_opf |= REQ_NOMERGE;
blkcg_bio_issue_init(split);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c5cf0dbca1db..9c8dc70020bc 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2890,6 +2890,7 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
struct blk_plug *plug, struct bio **bio, unsigned int nsegs)
{
struct request *rq;
+ enum hctx_type type, hctx_type;
if (!plug)
return NULL;
@@ -2902,7 +2903,10 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
return NULL;
}
- if (blk_mq_get_hctx_type((*bio)->bi_opf) != rq->mq_hctx->type)
+ type = blk_mq_get_hctx_type((*bio)->bi_opf);
+ hctx_type = rq->mq_hctx->type;
+ if (type != hctx_type &&
+ !(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT))
return NULL;
if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf))
return NULL;
@@ -2951,8 +2955,11 @@ void blk_mq_submit_bio(struct bio *bio)
blk_status_t ret;
bio = blk_queue_bounce(bio, q);
- if (bio_may_exceed_limits(bio, &q->limits))
+ if (bio_may_exceed_limits(bio, &q->limits)) {
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
+ if (!bio)
+ return;
+ }
if (!bio_integrity_prep(bio))
return;
@@ -4062,8 +4069,9 @@ EXPORT_SYMBOL(blk_mq_init_queue);
* blk_mq_destroy_queue - shutdown a request queue
* @q: request queue to shutdown
*
- * This shuts down a request queue allocated by blk_mq_init_queue() and drops
- * the initial reference. All future requests will failed with -ENODEV.
+ * This shuts down a request queue allocated by blk_mq_init_queue(). All future
+ * requests will be failed with -ENODEV. The caller is responsible for dropping
+ * the reference from blk_mq_init_queue() by calling blk_put_queue().
*
* Context: can sleep
*/
diff --git a/block/genhd.c b/block/genhd.c
index ab3cbe44196f..23cf83b3331c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1201,10 +1201,21 @@ struct class block_class = {
.dev_uevent = block_uevent,
};
+static char *block_devnode(struct device *dev, umode_t *mode,
+ kuid_t *uid, kgid_t *gid)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+
+ if (disk->fops->devnode)
+ return disk->fops->devnode(disk, mode);
+ return NULL;
+}
+
const struct device_type disk_type = {
.name = "disk",
.groups = disk_attr_groups,
.release = disk_release,
+ .devnode = block_devnode,
};
#ifdef CONFIG_PROC_FS
diff --git a/certs/Makefile b/certs/Makefile
index 9486ed924731..799ad7b9e68a 100644
--- a/certs/Makefile
+++ b/certs/Makefile
@@ -23,8 +23,8 @@ $(obj)/blacklist_hash_list: $(CONFIG_SYSTEM_BLACKLIST_HASH_LIST) FORCE
targets += blacklist_hash_list
quiet_cmd_extract_certs = CERT $@
- cmd_extract_certs = $(obj)/extract-cert $(extract-cert-in) $@
-extract-cert-in = $(or $(filter-out $(obj)/extract-cert, $(real-prereqs)),"")
+ cmd_extract_certs = $(obj)/extract-cert "$(extract-cert-in)" $@
+extract-cert-in = $(filter-out $(obj)/extract-cert, $(real-prereqs))
$(obj)/system_certificates.o: $(obj)/x509_certificate_list
diff --git a/certs/blacklist.c b/certs/blacklist.c
index 41f10601cc72..675dd7a8f07a 100644
--- a/certs/blacklist.c
+++ b/certs/blacklist.c
@@ -183,16 +183,19 @@ static int mark_raw_hash_blacklisted(const char *hash)
{
key_ref_t key;
- key = key_create_or_update(make_key_ref(blacklist_keyring, true),
- "blacklist",
- hash,
- NULL,
- 0,
- BLACKLIST_KEY_PERM,
- KEY_ALLOC_NOT_IN_QUOTA |
- KEY_ALLOC_BUILT_IN);
+ key = key_create(make_key_ref(blacklist_keyring, true),
+ "blacklist",
+ hash,
+ NULL,
+ 0,
+ BLACKLIST_KEY_PERM,
+ KEY_ALLOC_NOT_IN_QUOTA |
+ KEY_ALLOC_BUILT_IN);
if (IS_ERR(key)) {
- pr_err("Problem blacklisting hash (%ld)\n", PTR_ERR(key));
+ if (PTR_ERR(key) == -EEXIST)
+ pr_warn("Duplicate blacklisted hash %s\n", hash);
+ else
+ pr_err("Problem blacklisting hash %s: %pe\n", hash, key);
return PTR_ERR(key);
}
return 0;
diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig
index 3df3fe4ed95f..1ef3b46d6f6e 100644
--- a/crypto/asymmetric_keys/Kconfig
+++ b/crypto/asymmetric_keys/Kconfig
@@ -83,6 +83,6 @@ config FIPS_SIGNATURE_SELFTEST
for FIPS.
depends on KEYS
depends on ASYMMETRIC_KEY_TYPE
- depends on PKCS7_MESSAGE_PARSER
+ depends on PKCS7_MESSAGE_PARSER=X509_CERTIFICATE_PARSER
endif # ASYMMETRIC_KEY_TYPE
diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c
index f6321c785714..4fa769c4bcdb 100644
--- a/crypto/asymmetric_keys/pkcs7_verify.c
+++ b/crypto/asymmetric_keys/pkcs7_verify.c
@@ -485,3 +485,4 @@ int pkcs7_supply_detached_data(struct pkcs7_message *pkcs7,
pkcs7->data_len = datalen;
return 0;
}
+EXPORT_SYMBOL_GPL(pkcs7_supply_detached_data);
diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index 2f8352e88860..eca5671ad3f2 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -186,8 +186,28 @@ static int software_key_query(const struct kernel_pkey_params *params,
len = crypto_akcipher_maxsize(tfm);
info->key_size = len * 8;
- info->max_data_size = len;
- info->max_sig_size = len;
+
+ if (strncmp(pkey->pkey_algo, "ecdsa", 5) == 0) {
+ /*
+ * ECDSA key sizes are much smaller than RSA, and thus could
+ * operate on (hashed) inputs that are larger than key size.
+ * For example SHA384-hashed input used with secp256r1
+ * based keys. Set max_data_size to be at least as large as
+ * the largest supported hash size (SHA512)
+ */
+ info->max_data_size = 64;
+
+ /*
+ * Verify takes ECDSA-Sig (described in RFC 5480) as input,
+ * which is actually 2 'key_size'-bit integers encoded in
+ * ASN.1. Account for the ASN.1 encoding overhead here.
+ */
+ info->max_sig_size = 2 * (len + 3) + 2;
+ } else {
+ info->max_data_size = len;
+ info->max_sig_size = len;
+ }
+
info->max_enc_size = len;
info->max_dec_size = len;
info->supported_ops = (KEYCTL_SUPPORTS_ENCRYPT |
diff --git a/drivers/accessibility/speakup/spk_ttyio.c b/drivers/accessibility/speakup/spk_ttyio.c
index 08cf8a17754b..07373b3debd1 100644
--- a/drivers/accessibility/speakup/spk_ttyio.c
+++ b/drivers/accessibility/speakup/spk_ttyio.c
@@ -354,6 +354,9 @@ void spk_ttyio_release(struct spk_synth *in_synth)
{
struct tty_struct *tty = in_synth->dev;
+ if (tty == NULL)
+ return;
+
tty_lock(tty);
if (tty->ops->close)
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 204fe94c7e45..a194f30876c5 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -75,7 +75,8 @@ static struct acpi_bus_type *acpi_get_bus_type(struct device *dev)
}
#define FIND_CHILD_MIN_SCORE 1
-#define FIND_CHILD_MAX_SCORE 2
+#define FIND_CHILD_MID_SCORE 2
+#define FIND_CHILD_MAX_SCORE 3
static int match_any(struct acpi_device *adev, void *not_used)
{
@@ -96,8 +97,17 @@ static int find_child_checks(struct acpi_device *adev, bool check_children)
return -ENODEV;
status = acpi_evaluate_integer(adev->handle, "_STA", NULL, &sta);
- if (status == AE_NOT_FOUND)
+ if (status == AE_NOT_FOUND) {
+ /*
+ * Special case: backlight device objects without _STA are
+ * preferred to other objects with the same _ADR value, because
+ * it is more likely that they are actually useful.
+ */
+ if (adev->pnp.type.backlight)
+ return FIND_CHILD_MID_SCORE;
+
return FIND_CHILD_MIN_SCORE;
+ }
if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED))
return -ENODEV;
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index f1cc5ec6a3b6..4e48d6db05eb 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -3297,8 +3297,8 @@ void acpi_nfit_shutdown(void *data)
mutex_lock(&acpi_desc->init_mutex);
set_bit(ARS_CANCEL, &acpi_desc->scrub_flags);
- cancel_delayed_work_sync(&acpi_desc->dwork);
mutex_unlock(&acpi_desc->init_mutex);
+ cancel_delayed_work_sync(&acpi_desc->dwork);
/*
* Bounce the nvdimm bus lock to make sure any in-flight
diff --git a/drivers/acpi/prmt.c b/drivers/acpi/prmt.c
index 998101cf16e4..3d4c4620f9f9 100644
--- a/drivers/acpi/prmt.c
+++ b/drivers/acpi/prmt.c
@@ -236,6 +236,11 @@ static acpi_status acpi_platformrt_space_handler(u32 function,
efi_status_t status;
struct prm_context_buffer context;
+ if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
+ pr_err_ratelimited("PRM: EFI runtime services no longer available\n");
+ return AE_NO_HANDLER;
+ }
+
/*
* The returned acpi_status will always be AE_OK. Error values will be
* saved in the first byte of the PRM message buffer to be used by ASL.
@@ -325,6 +330,11 @@ void __init init_prmt(void)
pr_info("PRM: found %u modules\n", mc);
+ if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
+ pr_err("PRM: EFI runtime services unavailable\n");
+ return;
+ }
+
status = acpi_install_address_space_handler(ACPI_ROOT_OBJECT,
ACPI_ADR_SPACE_PLATFORM_RT,
&acpi_platformrt_space_handler,
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 16dcd31d124f..192d1784e409 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -433,6 +433,13 @@ static const struct dmi_system_id asus_laptop[] = {
},
},
{
+ .ident = "Asus ExpertBook B2402CBA",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "B2402CBA"),
+ },
+ },
+ {
.ident = "Asus ExpertBook B2502",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 274344434282..0c6f06abe3f4 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1370,9 +1370,12 @@ static void acpi_set_pnp_ids(acpi_handle handle, struct acpi_device_pnp *pnp,
* Some devices don't reliably have _HIDs & _CIDs, so add
* synthetic HIDs to make sure drivers can find them.
*/
- if (acpi_is_video_device(handle))
+ if (acpi_is_video_device(handle)) {
acpi_add_id(pnp, ACPI_VIDEO_HID);
- else if (acpi_bay_match(handle))
+ pnp->type.backlight = 1;
+ break;
+ }
+ if (acpi_bay_match(handle))
acpi_add_id(pnp, ACPI_BAY_HID);
else if (acpi_dock_match(handle))
acpi_add_id(pnp, ACPI_DOCK_HID);
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 0b557c0d405e..4ca667251272 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -60,13 +60,17 @@ static struct notifier_block tts_notifier = {
.priority = 0,
};
+#ifndef acpi_skip_set_wakeup_address
+#define acpi_skip_set_wakeup_address() false
+#endif
+
static int acpi_sleep_prepare(u32 acpi_state)
{
#ifdef CONFIG_ACPI_SLEEP
unsigned long acpi_wakeup_address;
/* do we have a wakeup address for S2 and S3? */
- if (acpi_state == ACPI_STATE_S3) {
+ if (acpi_state == ACPI_STATE_S3 && !acpi_skip_set_wakeup_address()) {
acpi_wakeup_address = acpi_get_wakeup_address();
if (!acpi_wakeup_address)
return -EFAULT;
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 1b78c7434492..a8c02608dde4 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -50,6 +50,10 @@ static void acpi_video_parse_cmdline(void)
acpi_backlight_cmdline = acpi_backlight_video;
if (!strcmp("native", acpi_video_backlight_string))
acpi_backlight_cmdline = acpi_backlight_native;
+ if (!strcmp("nvidia_wmi_ec", acpi_video_backlight_string))
+ acpi_backlight_cmdline = acpi_backlight_nvidia_wmi_ec;
+ if (!strcmp("apple_gmux", acpi_video_backlight_string))
+ acpi_backlight_cmdline = acpi_backlight_apple_gmux;
if (!strcmp("none", acpi_video_backlight_string))
acpi_backlight_cmdline = acpi_backlight_none;
}
@@ -106,26 +110,6 @@ static bool nvidia_wmi_ec_supported(void)
}
#endif
-static bool apple_gmux_backlight_present(void)
-{
- struct acpi_device *adev;
- struct device *dev;
-
- adev = acpi_dev_get_first_match_dev(GMUX_ACPI_HID, NULL, -1);
- if (!adev)
- return false;
-
- dev = acpi_get_first_physical_node(adev);
- if (!dev)
- return false;
-
- /*
- * drivers/platform/x86/apple-gmux.c only supports old style
- * Apple GMUX with an IO-resource.
- */
- return pnp_get_resource(to_pnp_dev(dev), IORESOURCE_IO, 0) != NULL;
-}
-
/* Force to use vendor driver when the ACPI device is known to be
* buggy */
static int video_detect_force_vendor(const struct dmi_system_id *d)
@@ -513,6 +497,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
},
{
.callback = video_detect_force_native,
+ /* Acer Aspire 4810T */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 4810T"),
+ },
+ },
+ {
+ .callback = video_detect_force_native,
/* Acer Aspire 5738z */
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
@@ -600,6 +592,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
},
{
.callback = video_detect_force_native,
+ /* Asus U46E */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "U46E"),
+ },
+ },
+ {
+ .callback = video_detect_force_native,
/* Asus UX303UB */
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
@@ -608,6 +608,23 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
},
{
.callback = video_detect_force_native,
+ /* HP EliteBook 8460p */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook 8460p"),
+ },
+ },
+ {
+ .callback = video_detect_force_native,
+ /* HP Pavilion g6-1d80nr / B4U19UA */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion g6 Notebook PC"),
+ DMI_MATCH(DMI_PRODUCT_SKU, "B4U19UA"),
+ },
+ },
+ {
+ .callback = video_detect_force_native,
/* Samsung N150P */
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
@@ -754,6 +771,7 @@ static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native)
{
static DEFINE_MUTEX(init_mutex);
static bool nvidia_wmi_ec_present;
+ static bool apple_gmux_present;
static bool native_available;
static bool init_done;
static long video_caps;
@@ -767,6 +785,7 @@ static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native)
ACPI_UINT32_MAX, find_video, NULL,
&video_caps, NULL);
nvidia_wmi_ec_present = nvidia_wmi_ec_supported();
+ apple_gmux_present = apple_gmux_detect(NULL, NULL);
init_done = true;
}
if (native)
@@ -788,7 +807,7 @@ static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native)
if (nvidia_wmi_ec_present)
return acpi_backlight_nvidia_wmi_ec;
- if (apple_gmux_backlight_present())
+ if (apple_gmux_present)
return acpi_backlight_apple_gmux;
/* Use ACPI video if available, except when native should be preferred. */
diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c
index 09b2ce7e4c34..348d63d1e3d3 100644
--- a/drivers/android/binderfs.c
+++ b/drivers/android/binderfs.c
@@ -352,7 +352,7 @@ static inline bool is_binderfs_control_device(const struct dentry *dentry)
return info->control_dentry == dentry;
}
-static int binderfs_rename(struct user_namespace *mnt_userns,
+static int binderfs_rename(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
@@ -361,7 +361,7 @@ static int binderfs_rename(struct user_namespace *mnt_userns,
is_binderfs_control_device(new_dentry))
return -EPERM;
- return simple_rename(&init_user_ns, old_dir, old_dentry, new_dir,
+ return simple_rename(idmap, old_dir, old_dentry, new_dir,
new_dentry, flags);
}
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index eceaec33af65..9695c4404e26 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -640,6 +640,7 @@ config PATA_CS5530
config PATA_CS5535
tristate "CS5535 PATA support (Experimental)"
depends on PCI && (X86_32 || (X86_64 && COMPILE_TEST))
+ depends on !UML
help
This option enables support for the NatSemi/AMD CS5535
companion chip used with the Geode processor family.
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 14a1c0d14916..3bb9bb483fe3 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -421,6 +421,7 @@ static const struct pci_device_id ahci_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, 0x34d3), board_ahci_low_power }, /* Ice Lake LP AHCI */
{ PCI_VDEVICE(INTEL, 0x02d3), board_ahci_low_power }, /* Comet Lake PCH-U AHCI */
{ PCI_VDEVICE(INTEL, 0x02d7), board_ahci_low_power }, /* Comet Lake PCH RAID */
+ { PCI_VDEVICE(INTEL, 0xa0d3), board_ahci_low_power }, /* Tiger Lake UP{3,4} AHCI */
/* JMicron 360/1/3/5/6, match class to avoid IDE function */
{ PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 884ae73b11ea..c4c89d24f84c 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3109,7 +3109,7 @@ int sata_down_spd_limit(struct ata_link *link, u32 spd_limit)
*/
if (spd > 1)
mask &= (1 << (spd - 1)) - 1;
- else
+ else if (link->sata_spd)
return -EINVAL;
/* were we already at the bottom? */
@@ -4045,6 +4045,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
{ "Samsung SSD 870*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
ATA_HORKAGE_ZERO_AFTER_TRIM |
ATA_HORKAGE_NO_NCQ_ON_ATI },
+ { "SAMSUNG*MZ7LH*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
+ ATA_HORKAGE_ZERO_AFTER_TRIM |
+ ATA_HORKAGE_NO_NCQ_ON_ATI, },
{ "FCCT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
ATA_HORKAGE_ZERO_AFTER_TRIM },
diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c
index 35608a0cf552..4cbcdc5da038 100644
--- a/drivers/ata/pata_octeon_cf.c
+++ b/drivers/ata/pata_octeon_cf.c
@@ -67,7 +67,7 @@ module_param(enable_dma, int, 0444);
MODULE_PARM_DESC(enable_dma,
"Enable use of DMA on interfaces that support it (0=no dma [default], 1=use dma)");
-/**
+/*
* Convert nanosecond based time to setting used in the
* boot bus timing register, based on timing multiple
*/
@@ -114,7 +114,7 @@ static void octeon_cf_set_boot_reg_cfg(int cs, unsigned int multiplier)
cvmx_write_csr(CVMX_MIO_BOOT_REG_CFGX(cs), reg_cfg.u64);
}
-/**
+/*
* Called after libata determines the needed PIO mode. This
* function programs the Octeon bootbus regions to support the
* timing requirements of the PIO mode.
@@ -278,7 +278,7 @@ static void octeon_cf_set_dmamode(struct ata_port *ap, struct ata_device *dev)
cvmx_write_csr(cf_port->dma_base + DMA_TIM, dma_tim.u64);
}
-/**
+/*
* Handle an 8 bit I/O request.
*
* @qc: Queued command
@@ -317,7 +317,7 @@ static unsigned int octeon_cf_data_xfer8(struct ata_queued_cmd *qc,
return buflen;
}
-/**
+/*
* Handle a 16 bit I/O request.
*
* @qc: Queued command
@@ -372,7 +372,7 @@ static unsigned int octeon_cf_data_xfer16(struct ata_queued_cmd *qc,
return buflen;
}
-/**
+/*
* Read the taskfile for 16bit non-True IDE only.
*/
static void octeon_cf_tf_read16(struct ata_port *ap, struct ata_taskfile *tf)
@@ -453,7 +453,7 @@ static int octeon_cf_softreset16(struct ata_link *link, unsigned int *classes,
return 0;
}
-/**
+/*
* Load the taskfile for 16bit non-True IDE only. The device_addr is
* not loaded, we do this as part of octeon_cf_exec_command16.
*/
@@ -525,7 +525,7 @@ static void octeon_cf_dma_setup(struct ata_queued_cmd *qc)
ap->ops->sff_exec_command(ap, &qc->tf);
}
-/**
+/*
* Start a DMA transfer that was already setup
*
* @qc: Information about the DMA
@@ -580,7 +580,7 @@ static void octeon_cf_dma_start(struct ata_queued_cmd *qc)
cvmx_write_csr(cf_port->dma_base + DMA_CFG, mio_boot_dma_cfg.u64);
}
-/**
+/*
*
* LOCKING:
* spin_lock_irqsave(host lock)
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index e4bffeabf344..03e8a95f1f35 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -173,7 +173,7 @@ static int dev_mkdir(const char *name, umode_t mode)
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- err = vfs_mkdir(&init_user_ns, d_inode(path.dentry), dentry, mode);
+ err = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode);
if (!err)
/* mark as kernel-created inode */
d_inode(dentry)->i_private = &thread;
@@ -223,7 +223,7 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- err = vfs_mknod(&init_user_ns, d_inode(path.dentry), dentry, mode,
+ err = vfs_mknod(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode,
dev->devt);
if (!err) {
struct iattr newattrs;
@@ -233,7 +233,7 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
newattrs.ia_gid = gid;
newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID;
inode_lock(d_inode(dentry));
- notify_change(&init_user_ns, dentry, &newattrs, NULL);
+ notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL);
inode_unlock(d_inode(dentry));
/* mark as kernel-created inode */
@@ -254,7 +254,7 @@ static int dev_rmdir(const char *name)
return PTR_ERR(dentry);
if (d_really_is_positive(dentry)) {
if (d_inode(dentry)->i_private == &thread)
- err = vfs_rmdir(&init_user_ns, d_inode(parent.dentry),
+ err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry),
dentry);
else
err = -EPERM;
@@ -341,9 +341,9 @@ static int handle_remove(const char *nodename, struct device *dev)
newattrs.ia_valid =
ATTR_UID|ATTR_GID|ATTR_MODE;
inode_lock(d_inode(dentry));
- notify_change(&init_user_ns, dentry, &newattrs, NULL);
+ notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL);
inode_unlock(d_inode(dentry));
- err = vfs_unlink(&init_user_ns, d_inode(parent.dentry),
+ err = vfs_unlink(&nop_mnt_idmap, d_inode(parent.dentry),
dentry, NULL);
if (!err || err == -ENOENT)
deleted = 1;
diff --git a/drivers/base/property.c b/drivers/base/property.c
index bbb3e499ff4a..083a95791d3b 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -997,26 +997,32 @@ struct fwnode_handle *
fwnode_graph_get_next_endpoint(const struct fwnode_handle *fwnode,
struct fwnode_handle *prev)
{
+ struct fwnode_handle *ep, *port_parent = NULL;
const struct fwnode_handle *parent;
- struct fwnode_handle *ep;
/*
* If this function is in a loop and the previous iteration returned
* an endpoint from fwnode->secondary, then we need to use the secondary
* as parent rather than @fwnode.
*/
- if (prev)
- parent = fwnode_graph_get_port_parent(prev);
- else
+ if (prev) {
+ port_parent = fwnode_graph_get_port_parent(prev);
+ parent = port_parent;
+ } else {
parent = fwnode;
+ }
if (IS_ERR_OR_NULL(parent))
return NULL;
ep = fwnode_call_ptr_op(parent, graph_get_next_endpoint, prev);
if (ep)
- return ep;
+ goto out_put_port_parent;
+
+ ep = fwnode_graph_get_next_endpoint(parent->secondary, NULL);
- return fwnode_graph_get_next_endpoint(parent->secondary, NULL);
+out_put_port_parent:
+ fwnode_handle_put(port_parent);
+ return ep;
}
EXPORT_SYMBOL_GPL(fwnode_graph_get_next_endpoint);
diff --git a/drivers/base/test/test_async_driver_probe.c b/drivers/base/test/test_async_driver_probe.c
index 4d1976ca5072..929410d0dd6f 100644
--- a/drivers/base/test/test_async_driver_probe.c
+++ b/drivers/base/test/test_async_driver_probe.c
@@ -145,7 +145,7 @@ static int __init test_async_probe_init(void)
calltime = ktime_get();
for_each_online_cpu(cpu) {
nid = cpu_to_node(cpu);
- pdev = &sync_dev[sync_id];
+ pdev = &async_dev[async_id];
*pdev = test_platform_device_register_node("test_async_driver",
async_id,
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index a2184b428493..a41145d52de9 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -285,6 +285,49 @@ config BLK_DEV_RAM_SIZE
The default value is 4096 kilobytes. Only change this if you know
what you are doing.
+config CDROM_PKTCDVD
+ tristate "Packet writing on CD/DVD media (DEPRECATED)"
+ depends on !UML
+ depends on SCSI
+ select CDROM
+ help
+ Note: This driver is deprecated and will be removed from the
+ kernel in the near future!
+
+ If you have a CDROM/DVD drive that supports packet writing, say
+ Y to include support. It should work with any MMC/Mt Fuji
+ compliant ATAPI or SCSI drive, which is just about any newer
+ DVD/CD writer.
+
+ Currently only writing to CD-RW, DVD-RW, DVD+RW and DVDRAM discs
+ is possible.
+ DVD-RW disks must be in restricted overwrite mode.
+
+ See the file <file:Documentation/cdrom/packet-writing.rst>
+ for further information on the use of this driver.
+
+ To compile this driver as a module, choose M here: the
+ module will be called pktcdvd.
+
+config CDROM_PKTCDVD_BUFFERS
+ int "Free buffers for data gathering"
+ depends on CDROM_PKTCDVD
+ default "8"
+ help
+ This controls the maximum number of active concurrent packets. More
+ concurrent packets can increase write performance, but also require
+ more memory. Each concurrent packet will require approximately 64Kb
+ of non-swappable kernel memory, memory which will be allocated when
+ a disc is opened for writing.
+
+config CDROM_PKTCDVD_WCACHE
+ bool "Enable write caching"
+ depends on CDROM_PKTCDVD
+ help
+ If enabled, write caching will be set for the CD-R/W device. For now
+ this option is dangerous unless the CD-RW media is known good, as we
+ don't do deferred write error handling yet.
+
config ATA_OVER_ETH
tristate "ATA over Ethernet support"
depends on NET
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 962ee65d8ca3..101612cba303 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o
obj-$(CONFIG_N64CART) += n64cart.o
obj-$(CONFIG_BLK_DEV_RAM) += brd.o
obj-$(CONFIG_BLK_DEV_LOOP) += loop.o
+obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o
obj-$(CONFIG_SUNVDC) += sunvdc.o
obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index eb14ec8ec04c..e36216d50753 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1607,6 +1607,8 @@ void drbd_submit_bio(struct bio *bio)
struct drbd_device *device = bio->bi_bdev->bd_disk->private_data;
bio = bio_split_to_limits(bio);
+ if (!bio)
+ return;
/*
* what we "blindly" assume:
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
new file mode 100644
index 000000000000..2f1a92509271
--- /dev/null
+++ b/drivers/block/pktcdvd.c
@@ -0,0 +1,2946 @@
+/*
+ * Copyright (C) 2000 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2001-2004 Peter Osterlund <petero2@telia.com>
+ * Copyright (C) 2006 Thomas Maier <balagi@justmail.de>
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License. See linux/COPYING for more information.
+ *
+ * Packet writing layer for ATAPI and SCSI CD-RW, DVD+RW, DVD-RW and
+ * DVD-RAM devices.
+ *
+ * Theory of operation:
+ *
+ * At the lowest level, there is the standard driver for the CD/DVD device,
+ * such as drivers/scsi/sr.c. This driver can handle read and write requests,
+ * but it doesn't know anything about the special restrictions that apply to
+ * packet writing. One restriction is that write requests must be aligned to
+ * packet boundaries on the physical media, and the size of a write request
+ * must be equal to the packet size. Another restriction is that a
+ * GPCMD_FLUSH_CACHE command has to be issued to the drive before a read
+ * command, if the previous command was a write.
+ *
+ * The purpose of the packet writing driver is to hide these restrictions from
+ * higher layers, such as file systems, and present a block device that can be
+ * randomly read and written using 2kB-sized blocks.
+ *
+ * The lowest layer in the packet writing driver is the packet I/O scheduler.
+ * Its data is defined by the struct packet_iosched and includes two bio
+ * queues with pending read and write requests. These queues are processed
+ * by the pkt_iosched_process_queue() function. The write requests in this
+ * queue are already properly aligned and sized. This layer is responsible for
+ * issuing the flush cache commands and scheduling the I/O in a good order.
+ *
+ * The next layer transforms unaligned write requests to aligned writes. This
+ * transformation requires reading missing pieces of data from the underlying
+ * block device, assembling the pieces to full packets and queuing them to the
+ * packet I/O scheduler.
+ *
+ * At the top layer there is a custom ->submit_bio function that forwards
+ * read requests directly to the iosched queue and puts write requests in the
+ * unaligned write queue. A kernel thread performs the necessary read
+ * gathering to convert the unaligned writes to aligned writes and then feeds
+ * them to the packet I/O scheduler.
+ *
+ *************************************************************************/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/pktcdvd.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/compat.h>
+#include <linux/kthread.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/file.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/miscdevice.h>
+#include <linux/freezer.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/backing-dev.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_ioctl.h>
+#include <scsi/scsi.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/nospec.h>
+#include <linux/uaccess.h>
+
+#define DRIVER_NAME "pktcdvd"
+
+#define pkt_err(pd, fmt, ...) \
+ pr_err("%s: " fmt, pd->name, ##__VA_ARGS__)
+#define pkt_notice(pd, fmt, ...) \
+ pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__)
+#define pkt_info(pd, fmt, ...) \
+ pr_info("%s: " fmt, pd->name, ##__VA_ARGS__)
+
+#define pkt_dbg(level, pd, fmt, ...) \
+do { \
+ if (level == 2 && PACKET_DEBUG >= 2) \
+ pr_notice("%s: %s():" fmt, \
+ pd->name, __func__, ##__VA_ARGS__); \
+ else if (level == 1 && PACKET_DEBUG >= 1) \
+ pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__); \
+} while (0)
+
+#define MAX_SPEED 0xffff
+
+static DEFINE_MUTEX(pktcdvd_mutex);
+static struct pktcdvd_device *pkt_devs[MAX_WRITERS];
+static struct proc_dir_entry *pkt_proc;
+static int pktdev_major;
+static int write_congestion_on = PKT_WRITE_CONGESTION_ON;
+static int write_congestion_off = PKT_WRITE_CONGESTION_OFF;
+static struct mutex ctl_mutex; /* Serialize open/close/setup/teardown */
+static mempool_t psd_pool;
+static struct bio_set pkt_bio_set;
+
+static struct class *class_pktcdvd = NULL; /* /sys/class/pktcdvd */
+static struct dentry *pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */
+
+/* forward declaration */
+static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev);
+static int pkt_remove_dev(dev_t pkt_dev);
+static int pkt_seq_show(struct seq_file *m, void *p);
+
+static sector_t get_zone(sector_t sector, struct pktcdvd_device *pd)
+{
+ return (sector + pd->offset) & ~(sector_t)(pd->settings.size - 1);
+}
+
+/**********************************************************
+ * sysfs interface for pktcdvd
+ * by (C) 2006 Thomas Maier <balagi@justmail.de>
+
+ /sys/class/pktcdvd/pktcdvd[0-7]/
+ stat/reset
+ stat/packets_started
+ stat/packets_finished
+ stat/kb_written
+ stat/kb_read
+ stat/kb_read_gather
+ write_queue/size
+ write_queue/congestion_off
+ write_queue/congestion_on
+ **********************************************************/
+
+static ssize_t packets_started_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%lu\n", pd->stats.pkt_started);
+}
+static DEVICE_ATTR_RO(packets_started);
+
+static ssize_t packets_finished_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%lu\n", pd->stats.pkt_ended);
+}
+static DEVICE_ATTR_RO(packets_finished);
+
+static ssize_t kb_written_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%lu\n", pd->stats.secs_w >> 1);
+}
+static DEVICE_ATTR_RO(kb_written);
+
+static ssize_t kb_read_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%lu\n", pd->stats.secs_r >> 1);
+}
+static DEVICE_ATTR_RO(kb_read);
+
+static ssize_t kb_read_gather_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%lu\n", pd->stats.secs_rg >> 1);
+}
+static DEVICE_ATTR_RO(kb_read_gather);
+
+static ssize_t reset_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
+
+ if (len > 0) {
+ pd->stats.pkt_started = 0;
+ pd->stats.pkt_ended = 0;
+ pd->stats.secs_w = 0;
+ pd->stats.secs_rg = 0;
+ pd->stats.secs_r = 0;
+ }
+ return len;
+}
+static DEVICE_ATTR_WO(reset);
+
+static struct attribute *pkt_stat_attrs[] = {
+ &dev_attr_packets_finished.attr,
+ &dev_attr_packets_started.attr,
+ &dev_attr_kb_read.attr,
+ &dev_attr_kb_written.attr,
+ &dev_attr_kb_read_gather.attr,
+ &dev_attr_reset.attr,
+ NULL,
+};
+
+static const struct attribute_group pkt_stat_group = {
+ .name = "stat",
+ .attrs = pkt_stat_attrs,
+};
+
+static ssize_t size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
+ int n;
+
+ spin_lock(&pd->lock);
+ n = sysfs_emit(buf, "%d\n", pd->bio_queue_size);
+ spin_unlock(&pd->lock);
+ return n;
+}
+static DEVICE_ATTR_RO(size);
+
+static void init_write_congestion_marks(int* lo, int* hi)
+{
+ if (*hi > 0) {
+ *hi = max(*hi, 500);
+ *hi = min(*hi, 1000000);
+ if (*lo <= 0)
+ *lo = *hi - 100;
+ else {
+ *lo = min(*lo, *hi - 100);
+ *lo = max(*lo, 100);
+ }
+ } else {
+ *hi = -1;
+ *lo = -1;
+ }
+}
+
+static ssize_t congestion_off_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
+ int n;
+
+ spin_lock(&pd->lock);
+ n = sysfs_emit(buf, "%d\n", pd->write_congestion_off);
+ spin_unlock(&pd->lock);
+ return n;
+}
+
+static ssize_t congestion_off_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
+ int val;
+
+ if (sscanf(buf, "%d", &val) == 1) {
+ spin_lock(&pd->lock);
+ pd->write_congestion_off = val;
+ init_write_congestion_marks(&pd->write_congestion_off,
+ &pd->write_congestion_on);
+ spin_unlock(&pd->lock);
+ }
+ return len;
+}
+static DEVICE_ATTR_RW(congestion_off);
+
+static ssize_t congestion_on_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
+ int n;
+
+ spin_lock(&pd->lock);
+ n = sysfs_emit(buf, "%d\n", pd->write_congestion_on);
+ spin_unlock(&pd->lock);
+ return n;
+}
+
+static ssize_t congestion_on_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
+ int val;
+
+ if (sscanf(buf, "%d", &val) == 1) {
+ spin_lock(&pd->lock);
+ pd->write_congestion_on = val;
+ init_write_congestion_marks(&pd->write_congestion_off,
+ &pd->write_congestion_on);
+ spin_unlock(&pd->lock);
+ }
+ return len;
+}
+static DEVICE_ATTR_RW(congestion_on);
+
+static struct attribute *pkt_wq_attrs[] = {
+ &dev_attr_congestion_on.attr,
+ &dev_attr_congestion_off.attr,
+ &dev_attr_size.attr,
+ NULL,
+};
+
+static const struct attribute_group pkt_wq_group = {
+ .name = "write_queue",
+ .attrs = pkt_wq_attrs,
+};
+
+static const struct attribute_group *pkt_groups[] = {
+ &pkt_stat_group,
+ &pkt_wq_group,
+ NULL,
+};
+
+static void pkt_sysfs_dev_new(struct pktcdvd_device *pd)
+{
+ if (class_pktcdvd) {
+ pd->dev = device_create_with_groups(class_pktcdvd, NULL,
+ MKDEV(0, 0), pd, pkt_groups,
+ "%s", pd->name);
+ if (IS_ERR(pd->dev))
+ pd->dev = NULL;
+ }
+}
+
+static void pkt_sysfs_dev_remove(struct pktcdvd_device *pd)
+{
+ if (class_pktcdvd)
+ device_unregister(pd->dev);
+}
+
+
+/********************************************************************
+ /sys/class/pktcdvd/
+ add map block device
+ remove unmap packet dev
+ device_map show mappings
+ *******************************************************************/
+
+static void class_pktcdvd_release(struct class *cls)
+{
+ kfree(cls);
+}
+
+static ssize_t device_map_show(struct class *c, struct class_attribute *attr,
+ char *data)
+{
+ int n = 0;
+ int idx;
+ mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+ for (idx = 0; idx < MAX_WRITERS; idx++) {
+ struct pktcdvd_device *pd = pkt_devs[idx];
+ if (!pd)
+ continue;
+ n += sprintf(data+n, "%s %u:%u %u:%u\n",
+ pd->name,
+ MAJOR(pd->pkt_dev), MINOR(pd->pkt_dev),
+ MAJOR(pd->bdev->bd_dev),
+ MINOR(pd->bdev->bd_dev));
+ }
+ mutex_unlock(&ctl_mutex);
+ return n;
+}
+static CLASS_ATTR_RO(device_map);
+
+static ssize_t add_store(struct class *c, struct class_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned int major, minor;
+
+ if (sscanf(buf, "%u:%u", &major, &minor) == 2) {
+ /* pkt_setup_dev() expects caller to hold reference to self */
+ if (!try_module_get(THIS_MODULE))
+ return -ENODEV;
+
+ pkt_setup_dev(MKDEV(major, minor), NULL);
+
+ module_put(THIS_MODULE);
+
+ return count;
+ }
+
+ return -EINVAL;
+}
+static CLASS_ATTR_WO(add);
+
+static ssize_t remove_store(struct class *c, struct class_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned int major, minor;
+ if (sscanf(buf, "%u:%u", &major, &minor) == 2) {
+ pkt_remove_dev(MKDEV(major, minor));
+ return count;
+ }
+ return -EINVAL;
+}
+static CLASS_ATTR_WO(remove);
+
+static struct attribute *class_pktcdvd_attrs[] = {
+ &class_attr_add.attr,
+ &class_attr_remove.attr,
+ &class_attr_device_map.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(class_pktcdvd);
+
+static int pkt_sysfs_init(void)
+{
+ int ret = 0;
+
+ /*
+ * create control files in sysfs
+ * /sys/class/pktcdvd/...
+ */
+ class_pktcdvd = kzalloc(sizeof(*class_pktcdvd), GFP_KERNEL);
+ if (!class_pktcdvd)
+ return -ENOMEM;
+ class_pktcdvd->name = DRIVER_NAME;
+ class_pktcdvd->owner = THIS_MODULE;
+ class_pktcdvd->class_release = class_pktcdvd_release;
+ class_pktcdvd->class_groups = class_pktcdvd_groups;
+ ret = class_register(class_pktcdvd);
+ if (ret) {
+ kfree(class_pktcdvd);
+ class_pktcdvd = NULL;
+ pr_err("failed to create class pktcdvd\n");
+ return ret;
+ }
+ return 0;
+}
+
+static void pkt_sysfs_cleanup(void)
+{
+ if (class_pktcdvd)
+ class_destroy(class_pktcdvd);
+ class_pktcdvd = NULL;
+}
+
+/********************************************************************
+ entries in debugfs
+
+ /sys/kernel/debug/pktcdvd[0-7]/
+ info
+
+ *******************************************************************/
+
+static int pkt_debugfs_seq_show(struct seq_file *m, void *p)
+{
+ return pkt_seq_show(m, p);
+}
+
+static int pkt_debugfs_fops_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, pkt_debugfs_seq_show, inode->i_private);
+}
+
+static const struct file_operations debug_fops = {
+ .open = pkt_debugfs_fops_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .owner = THIS_MODULE,
+};
+
+static void pkt_debugfs_dev_new(struct pktcdvd_device *pd)
+{
+ if (!pkt_debugfs_root)
+ return;
+ pd->dfs_d_root = debugfs_create_dir(pd->name, pkt_debugfs_root);
+ if (!pd->dfs_d_root)
+ return;
+
+ pd->dfs_f_info = debugfs_create_file("info", 0444,
+ pd->dfs_d_root, pd, &debug_fops);
+}
+
+static void pkt_debugfs_dev_remove(struct pktcdvd_device *pd)
+{
+ if (!pkt_debugfs_root)
+ return;
+ debugfs_remove(pd->dfs_f_info);
+ debugfs_remove(pd->dfs_d_root);
+ pd->dfs_f_info = NULL;
+ pd->dfs_d_root = NULL;
+}
+
+static void pkt_debugfs_init(void)
+{
+ pkt_debugfs_root = debugfs_create_dir(DRIVER_NAME, NULL);
+}
+
+static void pkt_debugfs_cleanup(void)
+{
+ debugfs_remove(pkt_debugfs_root);
+ pkt_debugfs_root = NULL;
+}
+
+/* ----------------------------------------------------------*/
+
+
+static void pkt_bio_finished(struct pktcdvd_device *pd)
+{
+ BUG_ON(atomic_read(&pd->cdrw.pending_bios) <= 0);
+ if (atomic_dec_and_test(&pd->cdrw.pending_bios)) {
+ pkt_dbg(2, pd, "queue empty\n");
+ atomic_set(&pd->iosched.attention, 1);
+ wake_up(&pd->wqueue);
+ }
+}
+
+/*
+ * Allocate a packet_data struct
+ */
+static struct packet_data *pkt_alloc_packet_data(int frames)
+{
+ int i;
+ struct packet_data *pkt;
+
+ pkt = kzalloc(sizeof(struct packet_data), GFP_KERNEL);
+ if (!pkt)
+ goto no_pkt;
+
+ pkt->frames = frames;
+ pkt->w_bio = bio_kmalloc(frames, GFP_KERNEL);
+ if (!pkt->w_bio)
+ goto no_bio;
+
+ for (i = 0; i < frames / FRAMES_PER_PAGE; i++) {
+ pkt->pages[i] = alloc_page(GFP_KERNEL|__GFP_ZERO);
+ if (!pkt->pages[i])
+ goto no_page;
+ }
+
+ spin_lock_init(&pkt->lock);
+ bio_list_init(&pkt->orig_bios);
+
+ for (i = 0; i < frames; i++) {
+ pkt->r_bios[i] = bio_kmalloc(1, GFP_KERNEL);
+ if (!pkt->r_bios[i])
+ goto no_rd_bio;
+ }
+
+ return pkt;
+
+no_rd_bio:
+ for (i = 0; i < frames; i++)
+ kfree(pkt->r_bios[i]);
+no_page:
+ for (i = 0; i < frames / FRAMES_PER_PAGE; i++)
+ if (pkt->pages[i])
+ __free_page(pkt->pages[i]);
+ kfree(pkt->w_bio);
+no_bio:
+ kfree(pkt);
+no_pkt:
+ return NULL;
+}
+
+/*
+ * Free a packet_data struct
+ */
+static void pkt_free_packet_data(struct packet_data *pkt)
+{
+ int i;
+
+ for (i = 0; i < pkt->frames; i++)
+ kfree(pkt->r_bios[i]);
+ for (i = 0; i < pkt->frames / FRAMES_PER_PAGE; i++)
+ __free_page(pkt->pages[i]);
+ kfree(pkt->w_bio);
+ kfree(pkt);
+}
+
+static void pkt_shrink_pktlist(struct pktcdvd_device *pd)
+{
+ struct packet_data *pkt, *next;
+
+ BUG_ON(!list_empty(&pd->cdrw.pkt_active_list));
+
+ list_for_each_entry_safe(pkt, next, &pd->cdrw.pkt_free_list, list) {
+ pkt_free_packet_data(pkt);
+ }
+ INIT_LIST_HEAD(&pd->cdrw.pkt_free_list);
+}
+
+static int pkt_grow_pktlist(struct pktcdvd_device *pd, int nr_packets)
+{
+ struct packet_data *pkt;
+
+ BUG_ON(!list_empty(&pd->cdrw.pkt_free_list));
+
+ while (nr_packets > 0) {
+ pkt = pkt_alloc_packet_data(pd->settings.size >> 2);
+ if (!pkt) {
+ pkt_shrink_pktlist(pd);
+ return 0;
+ }
+ pkt->id = nr_packets;
+ pkt->pd = pd;
+ list_add(&pkt->list, &pd->cdrw.pkt_free_list);
+ nr_packets--;
+ }
+ return 1;
+}
+
+static inline struct pkt_rb_node *pkt_rbtree_next(struct pkt_rb_node *node)
+{
+ struct rb_node *n = rb_next(&node->rb_node);
+ if (!n)
+ return NULL;
+ return rb_entry(n, struct pkt_rb_node, rb_node);
+}
+
+static void pkt_rbtree_erase(struct pktcdvd_device *pd, struct pkt_rb_node *node)
+{
+ rb_erase(&node->rb_node, &pd->bio_queue);
+ mempool_free(node, &pd->rb_pool);
+ pd->bio_queue_size--;
+ BUG_ON(pd->bio_queue_size < 0);
+}
+
+/*
+ * Find the first node in the pd->bio_queue rb tree with a starting sector >= s.
+ */
+static struct pkt_rb_node *pkt_rbtree_find(struct pktcdvd_device *pd, sector_t s)
+{
+ struct rb_node *n = pd->bio_queue.rb_node;
+ struct rb_node *next;
+ struct pkt_rb_node *tmp;
+
+ if (!n) {
+ BUG_ON(pd->bio_queue_size > 0);
+ return NULL;
+ }
+
+ for (;;) {
+ tmp = rb_entry(n, struct pkt_rb_node, rb_node);
+ if (s <= tmp->bio->bi_iter.bi_sector)
+ next = n->rb_left;
+ else
+ next = n->rb_right;
+ if (!next)
+ break;
+ n = next;
+ }
+
+ if (s > tmp->bio->bi_iter.bi_sector) {
+ tmp = pkt_rbtree_next(tmp);
+ if (!tmp)
+ return NULL;
+ }
+ BUG_ON(s > tmp->bio->bi_iter.bi_sector);
+ return tmp;
+}
+
+/*
+ * Insert a node into the pd->bio_queue rb tree.
+ */
+static void pkt_rbtree_insert(struct pktcdvd_device *pd, struct pkt_rb_node *node)
+{
+ struct rb_node **p = &pd->bio_queue.rb_node;
+ struct rb_node *parent = NULL;
+ sector_t s = node->bio->bi_iter.bi_sector;
+ struct pkt_rb_node *tmp;
+
+ while (*p) {
+ parent = *p;
+ tmp = rb_entry(parent, struct pkt_rb_node, rb_node);
+ if (s < tmp->bio->bi_iter.bi_sector)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+ rb_link_node(&node->rb_node, parent, p);
+ rb_insert_color(&node->rb_node, &pd->bio_queue);
+ pd->bio_queue_size++;
+}
+
+/*
+ * Send a packet_command to the underlying block device and
+ * wait for completion.
+ */
+static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc)
+{
+ struct request_queue *q = bdev_get_queue(pd->bdev);
+ struct scsi_cmnd *scmd;
+ struct request *rq;
+ int ret = 0;
+
+ rq = scsi_alloc_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
+ REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+ scmd = blk_mq_rq_to_pdu(rq);
+
+ if (cgc->buflen) {
+ ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
+ GFP_NOIO);
+ if (ret)
+ goto out;
+ }
+
+ scmd->cmd_len = COMMAND_SIZE(cgc->cmd[0]);
+ memcpy(scmd->cmnd, cgc->cmd, CDROM_PACKET_SIZE);
+
+ rq->timeout = 60*HZ;
+ if (cgc->quiet)
+ rq->rq_flags |= RQF_QUIET;
+
+ blk_execute_rq(rq, false);
+ if (scmd->result)
+ ret = -EIO;
+out:
+ blk_mq_free_request(rq);
+ return ret;
+}
+
+static const char *sense_key_string(__u8 index)
+{
+ static const char * const info[] = {
+ "No sense", "Recovered error", "Not ready",
+ "Medium error", "Hardware error", "Illegal request",
+ "Unit attention", "Data protect", "Blank check",
+ };
+
+ return index < ARRAY_SIZE(info) ? info[index] : "INVALID";
+}
+
+/*
+ * A generic sense dump / resolve mechanism should be implemented across
+ * all ATAPI + SCSI devices.
+ */
+static void pkt_dump_sense(struct pktcdvd_device *pd,
+ struct packet_command *cgc)
+{
+ struct scsi_sense_hdr *sshdr = cgc->sshdr;
+
+ if (sshdr)
+ pkt_err(pd, "%*ph - sense %02x.%02x.%02x (%s)\n",
+ CDROM_PACKET_SIZE, cgc->cmd,
+ sshdr->sense_key, sshdr->asc, sshdr->ascq,
+ sense_key_string(sshdr->sense_key));
+ else
+ pkt_err(pd, "%*ph - no sense\n", CDROM_PACKET_SIZE, cgc->cmd);
+}
+
+/*
+ * flush the drive cache to media
+ */
+static int pkt_flush_cache(struct pktcdvd_device *pd)
+{
+ struct packet_command cgc;
+
+ init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
+ cgc.cmd[0] = GPCMD_FLUSH_CACHE;
+ cgc.quiet = 1;
+
+ /*
+ * the IMMED bit -- we default to not setting it, although that
+ * would allow a much faster close, this is safer
+ */
+#if 0
+ cgc.cmd[1] = 1 << 1;
+#endif
+ return pkt_generic_packet(pd, &cgc);
+}
+
+/*
+ * speed is given as the normal factor, e.g. 4 for 4x
+ */
+static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd,
+ unsigned write_speed, unsigned read_speed)
+{
+ struct packet_command cgc;
+ struct scsi_sense_hdr sshdr;
+ int ret;
+
+ init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
+ cgc.sshdr = &sshdr;
+ cgc.cmd[0] = GPCMD_SET_SPEED;
+ cgc.cmd[2] = (read_speed >> 8) & 0xff;
+ cgc.cmd[3] = read_speed & 0xff;
+ cgc.cmd[4] = (write_speed >> 8) & 0xff;
+ cgc.cmd[5] = write_speed & 0xff;
+
+ ret = pkt_generic_packet(pd, &cgc);
+ if (ret)
+ pkt_dump_sense(pd, &cgc);
+
+ return ret;
+}
+
+/*
+ * Queue a bio for processing by the low-level CD device. Must be called
+ * from process context.
+ */
+static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio)
+{
+ spin_lock(&pd->iosched.lock);
+ if (bio_data_dir(bio) == READ)
+ bio_list_add(&pd->iosched.read_queue, bio);
+ else
+ bio_list_add(&pd->iosched.write_queue, bio);
+ spin_unlock(&pd->iosched.lock);
+
+ atomic_set(&pd->iosched.attention, 1);
+ wake_up(&pd->wqueue);
+}
+
+/*
+ * Process the queued read/write requests. This function handles special
+ * requirements for CDRW drives:
+ * - A cache flush command must be inserted before a read request if the
+ * previous request was a write.
+ * - Switching between reading and writing is slow, so don't do it more often
+ * than necessary.
+ * - Optimize for throughput at the expense of latency. This means that streaming
+ * writes will never be interrupted by a read, but if the drive has to seek
+ * before the next write, switch to reading instead if there are any pending
+ * read requests.
+ * - Set the read speed according to current usage pattern. When only reading
+ * from the device, it's best to use the highest possible read speed, but
+ * when switching often between reading and writing, it's better to have the
+ * same read and write speeds.
+ */
+static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
+{
+
+ if (atomic_read(&pd->iosched.attention) == 0)
+ return;
+ atomic_set(&pd->iosched.attention, 0);
+
+ for (;;) {
+ struct bio *bio;
+ int reads_queued, writes_queued;
+
+ spin_lock(&pd->iosched.lock);
+ reads_queued = !bio_list_empty(&pd->iosched.read_queue);
+ writes_queued = !bio_list_empty(&pd->iosched.write_queue);
+ spin_unlock(&pd->iosched.lock);
+
+ if (!reads_queued && !writes_queued)
+ break;
+
+ if (pd->iosched.writing) {
+ int need_write_seek = 1;
+ spin_lock(&pd->iosched.lock);
+ bio = bio_list_peek(&pd->iosched.write_queue);
+ spin_unlock(&pd->iosched.lock);
+ if (bio && (bio->bi_iter.bi_sector ==
+ pd->iosched.last_write))
+ need_write_seek = 0;
+ if (need_write_seek && reads_queued) {
+ if (atomic_read(&pd->cdrw.pending_bios) > 0) {
+ pkt_dbg(2, pd, "write, waiting\n");
+ break;
+ }
+ pkt_flush_cache(pd);
+ pd->iosched.writing = 0;
+ }
+ } else {
+ if (!reads_queued && writes_queued) {
+ if (atomic_read(&pd->cdrw.pending_bios) > 0) {
+ pkt_dbg(2, pd, "read, waiting\n");
+ break;
+ }
+ pd->iosched.writing = 1;
+ }
+ }
+
+ spin_lock(&pd->iosched.lock);
+ if (pd->iosched.writing)
+ bio = bio_list_pop(&pd->iosched.write_queue);
+ else
+ bio = bio_list_pop(&pd->iosched.read_queue);
+ spin_unlock(&pd->iosched.lock);
+
+ if (!bio)
+ continue;
+
+ if (bio_data_dir(bio) == READ)
+ pd->iosched.successive_reads +=
+ bio->bi_iter.bi_size >> 10;
+ else {
+ pd->iosched.successive_reads = 0;
+ pd->iosched.last_write = bio_end_sector(bio);
+ }
+ if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) {
+ if (pd->read_speed == pd->write_speed) {
+ pd->read_speed = MAX_SPEED;
+ pkt_set_speed(pd, pd->write_speed, pd->read_speed);
+ }
+ } else {
+ if (pd->read_speed != pd->write_speed) {
+ pd->read_speed = pd->write_speed;
+ pkt_set_speed(pd, pd->write_speed, pd->read_speed);
+ }
+ }
+
+ atomic_inc(&pd->cdrw.pending_bios);
+ submit_bio_noacct(bio);
+ }
+}
+
+/*
+ * Special care is needed if the underlying block device has a small
+ * max_phys_segments value.
+ */
+static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_queue *q)
+{
+ if ((pd->settings.size << 9) / CD_FRAMESIZE
+ <= queue_max_segments(q)) {
+ /*
+ * The cdrom device can handle one segment/frame
+ */
+ clear_bit(PACKET_MERGE_SEGS, &pd->flags);
+ return 0;
+ } else if ((pd->settings.size << 9) / PAGE_SIZE
+ <= queue_max_segments(q)) {
+ /*
+ * We can handle this case at the expense of some extra memory
+ * copies during write operations
+ */
+ set_bit(PACKET_MERGE_SEGS, &pd->flags);
+ return 0;
+ } else {
+ pkt_err(pd, "cdrom max_phys_segments too small\n");
+ return -EIO;
+ }
+}
+
+static void pkt_end_io_read(struct bio *bio)
+{
+ struct packet_data *pkt = bio->bi_private;
+ struct pktcdvd_device *pd = pkt->pd;
+ BUG_ON(!pd);
+
+ pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
+ bio, (unsigned long long)pkt->sector,
+ (unsigned long long)bio->bi_iter.bi_sector, bio->bi_status);
+
+ if (bio->bi_status)
+ atomic_inc(&pkt->io_errors);
+ bio_uninit(bio);
+ if (atomic_dec_and_test(&pkt->io_wait)) {
+ atomic_inc(&pkt->run_sm);
+ wake_up(&pd->wqueue);
+ }
+ pkt_bio_finished(pd);
+}
+
+static void pkt_end_io_packet_write(struct bio *bio)
+{
+ struct packet_data *pkt = bio->bi_private;
+ struct pktcdvd_device *pd = pkt->pd;
+ BUG_ON(!pd);
+
+ pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_status);
+
+ pd->stats.pkt_ended++;
+
+ bio_uninit(bio);
+ pkt_bio_finished(pd);
+ atomic_dec(&pkt->io_wait);
+ atomic_inc(&pkt->run_sm);
+ wake_up(&pd->wqueue);
+}
+
+/*
+ * Schedule reads for the holes in a packet
+ */
+static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
+{
+ int frames_read = 0;
+ struct bio *bio;
+ int f;
+ char written[PACKET_MAX_SIZE];
+
+ BUG_ON(bio_list_empty(&pkt->orig_bios));
+
+ atomic_set(&pkt->io_wait, 0);
+ atomic_set(&pkt->io_errors, 0);
+
+ /*
+ * Figure out which frames we need to read before we can write.
+ */
+ memset(written, 0, sizeof(written));
+ spin_lock(&pkt->lock);
+ bio_list_for_each(bio, &pkt->orig_bios) {
+ int first_frame = (bio->bi_iter.bi_sector - pkt->sector) /
+ (CD_FRAMESIZE >> 9);
+ int num_frames = bio->bi_iter.bi_size / CD_FRAMESIZE;
+ pd->stats.secs_w += num_frames * (CD_FRAMESIZE >> 9);
+ BUG_ON(first_frame < 0);
+ BUG_ON(first_frame + num_frames > pkt->frames);
+ for (f = first_frame; f < first_frame + num_frames; f++)
+ written[f] = 1;
+ }
+ spin_unlock(&pkt->lock);
+
+ if (pkt->cache_valid) {
+ pkt_dbg(2, pd, "zone %llx cached\n",
+ (unsigned long long)pkt->sector);
+ goto out_account;
+ }
+
+ /*
+ * Schedule reads for missing parts of the packet.
+ */
+ for (f = 0; f < pkt->frames; f++) {
+ int p, offset;
+
+ if (written[f])
+ continue;
+
+ bio = pkt->r_bios[f];
+ bio_init(bio, pd->bdev, bio->bi_inline_vecs, 1, REQ_OP_READ);
+ bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
+ bio->bi_end_io = pkt_end_io_read;
+ bio->bi_private = pkt;
+
+ p = (f * CD_FRAMESIZE) / PAGE_SIZE;
+ offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
+ pkt_dbg(2, pd, "Adding frame %d, page:%p offs:%d\n",
+ f, pkt->pages[p], offset);
+ if (!bio_add_page(bio, pkt->pages[p], CD_FRAMESIZE, offset))
+ BUG();
+
+ atomic_inc(&pkt->io_wait);
+ pkt_queue_bio(pd, bio);
+ frames_read++;
+ }
+
+out_account:
+ pkt_dbg(2, pd, "need %d frames for zone %llx\n",
+ frames_read, (unsigned long long)pkt->sector);
+ pd->stats.pkt_started++;
+ pd->stats.secs_rg += frames_read * (CD_FRAMESIZE >> 9);
+}
+
+/*
+ * Find a packet matching zone, or the least recently used packet if
+ * there is no match.
+ */
+static struct packet_data *pkt_get_packet_data(struct pktcdvd_device *pd, int zone)
+{
+ struct packet_data *pkt;
+
+ list_for_each_entry(pkt, &pd->cdrw.pkt_free_list, list) {
+ if (pkt->sector == zone || pkt->list.next == &pd->cdrw.pkt_free_list) {
+ list_del_init(&pkt->list);
+ if (pkt->sector != zone)
+ pkt->cache_valid = 0;
+ return pkt;
+ }
+ }
+ BUG();
+ return NULL;
+}
+
+static void pkt_put_packet_data(struct pktcdvd_device *pd, struct packet_data *pkt)
+{
+ if (pkt->cache_valid) {
+ list_add(&pkt->list, &pd->cdrw.pkt_free_list);
+ } else {
+ list_add_tail(&pkt->list, &pd->cdrw.pkt_free_list);
+ }
+}
+
+static inline void pkt_set_state(struct packet_data *pkt, enum packet_data_state state)
+{
+#if PACKET_DEBUG > 1
+ static const char *state_name[] = {
+ "IDLE", "WAITING", "READ_WAIT", "WRITE_WAIT", "RECOVERY", "FINISHED"
+ };
+ enum packet_data_state old_state = pkt->state;
+ pkt_dbg(2, pd, "pkt %2d : s=%6llx %s -> %s\n",
+ pkt->id, (unsigned long long)pkt->sector,
+ state_name[old_state], state_name[state]);
+#endif
+ pkt->state = state;
+}
+
+/*
+ * Scan the work queue to see if we can start a new packet.
+ * returns non-zero if any work was done.
+ */
+static int pkt_handle_queue(struct pktcdvd_device *pd)
+{
+ struct packet_data *pkt, *p;
+ struct bio *bio = NULL;
+ sector_t zone = 0; /* Suppress gcc warning */
+ struct pkt_rb_node *node, *first_node;
+ struct rb_node *n;
+
+ atomic_set(&pd->scan_queue, 0);
+
+ if (list_empty(&pd->cdrw.pkt_free_list)) {
+ pkt_dbg(2, pd, "no pkt\n");
+ return 0;
+ }
+
+ /*
+ * Try to find a zone we are not already working on.
+ */
+ spin_lock(&pd->lock);
+ first_node = pkt_rbtree_find(pd, pd->current_sector);
+ if (!first_node) {
+ n = rb_first(&pd->bio_queue);
+ if (n)
+ first_node = rb_entry(n, struct pkt_rb_node, rb_node);
+ }
+ node = first_node;
+ while (node) {
+ bio = node->bio;
+ zone = get_zone(bio->bi_iter.bi_sector, pd);
+ list_for_each_entry(p, &pd->cdrw.pkt_active_list, list) {
+ if (p->sector == zone) {
+ bio = NULL;
+ goto try_next_bio;
+ }
+ }
+ break;
+try_next_bio:
+ node = pkt_rbtree_next(node);
+ if (!node) {
+ n = rb_first(&pd->bio_queue);
+ if (n)
+ node = rb_entry(n, struct pkt_rb_node, rb_node);
+ }
+ if (node == first_node)
+ node = NULL;
+ }
+ spin_unlock(&pd->lock);
+ if (!bio) {
+ pkt_dbg(2, pd, "no bio\n");
+ return 0;
+ }
+
+ pkt = pkt_get_packet_data(pd, zone);
+
+ pd->current_sector = zone + pd->settings.size;
+ pkt->sector = zone;
+ BUG_ON(pkt->frames != pd->settings.size >> 2);
+ pkt->write_size = 0;
+
+ /*
+ * Scan work queue for bios in the same zone and link them
+ * to this packet.
+ */
+ spin_lock(&pd->lock);
+ pkt_dbg(2, pd, "looking for zone %llx\n", (unsigned long long)zone);
+ while ((node = pkt_rbtree_find(pd, zone)) != NULL) {
+ bio = node->bio;
+ pkt_dbg(2, pd, "found zone=%llx\n", (unsigned long long)
+ get_zone(bio->bi_iter.bi_sector, pd));
+ if (get_zone(bio->bi_iter.bi_sector, pd) != zone)
+ break;
+ pkt_rbtree_erase(pd, node);
+ spin_lock(&pkt->lock);
+ bio_list_add(&pkt->orig_bios, bio);
+ pkt->write_size += bio->bi_iter.bi_size / CD_FRAMESIZE;
+ spin_unlock(&pkt->lock);
+ }
+ /* check write congestion marks, and if bio_queue_size is
+ * below, wake up any waiters
+ */
+ if (pd->congested &&
+ pd->bio_queue_size <= pd->write_congestion_off) {
+ pd->congested = false;
+ wake_up_var(&pd->congested);
+ }
+ spin_unlock(&pd->lock);
+
+ pkt->sleep_time = max(PACKET_WAIT_TIME, 1);
+ pkt_set_state(pkt, PACKET_WAITING_STATE);
+ atomic_set(&pkt->run_sm, 1);
+
+ spin_lock(&pd->cdrw.active_list_lock);
+ list_add(&pkt->list, &pd->cdrw.pkt_active_list);
+ spin_unlock(&pd->cdrw.active_list_lock);
+
+ return 1;
+}
+
+/**
+ * bio_list_copy_data - copy contents of data buffers from one chain of bios to
+ * another
+ * @src: source bio list
+ * @dst: destination bio list
+ *
+ * Stops when it reaches the end of either the @src list or @dst list - that is,
+ * copies min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of
+ * bios).
+ */
+static void bio_list_copy_data(struct bio *dst, struct bio *src)
+{
+ struct bvec_iter src_iter = src->bi_iter;
+ struct bvec_iter dst_iter = dst->bi_iter;
+
+ while (1) {
+ if (!src_iter.bi_size) {
+ src = src->bi_next;
+ if (!src)
+ break;
+
+ src_iter = src->bi_iter;
+ }
+
+ if (!dst_iter.bi_size) {
+ dst = dst->bi_next;
+ if (!dst)
+ break;
+
+ dst_iter = dst->bi_iter;
+ }
+
+ bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
+ }
+}
+
+/*
+ * Assemble a bio to write one packet and queue the bio for processing
+ * by the underlying block device.
+ */
+static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
+{
+ int f;
+
+ bio_init(pkt->w_bio, pd->bdev, pkt->w_bio->bi_inline_vecs, pkt->frames,
+ REQ_OP_WRITE);
+ pkt->w_bio->bi_iter.bi_sector = pkt->sector;
+ pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
+ pkt->w_bio->bi_private = pkt;
+
+ /* XXX: locking? */
+ for (f = 0; f < pkt->frames; f++) {
+ struct page *page = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE];
+ unsigned offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
+
+ if (!bio_add_page(pkt->w_bio, page, CD_FRAMESIZE, offset))
+ BUG();
+ }
+ pkt_dbg(2, pd, "vcnt=%d\n", pkt->w_bio->bi_vcnt);
+
+ /*
+ * Fill-in bvec with data from orig_bios.
+ */
+ spin_lock(&pkt->lock);
+ bio_list_copy_data(pkt->w_bio, pkt->orig_bios.head);
+
+ pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE);
+ spin_unlock(&pkt->lock);
+
+ pkt_dbg(2, pd, "Writing %d frames for zone %llx\n",
+ pkt->write_size, (unsigned long long)pkt->sector);
+
+ if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames))
+ pkt->cache_valid = 1;
+ else
+ pkt->cache_valid = 0;
+
+ /* Start the write request */
+ atomic_set(&pkt->io_wait, 1);
+ pkt_queue_bio(pd, pkt->w_bio);
+}
+
+static void pkt_finish_packet(struct packet_data *pkt, blk_status_t status)
+{
+ struct bio *bio;
+
+ if (status)
+ pkt->cache_valid = 0;
+
+ /* Finish all bios corresponding to this packet */
+ while ((bio = bio_list_pop(&pkt->orig_bios))) {
+ bio->bi_status = status;
+ bio_endio(bio);
+ }
+}
+
+static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data *pkt)
+{
+ pkt_dbg(2, pd, "pkt %d\n", pkt->id);
+
+ for (;;) {
+ switch (pkt->state) {
+ case PACKET_WAITING_STATE:
+ if ((pkt->write_size < pkt->frames) && (pkt->sleep_time > 0))
+ return;
+
+ pkt->sleep_time = 0;
+ pkt_gather_data(pd, pkt);
+ pkt_set_state(pkt, PACKET_READ_WAIT_STATE);
+ break;
+
+ case PACKET_READ_WAIT_STATE:
+ if (atomic_read(&pkt->io_wait) > 0)
+ return;
+
+ if (atomic_read(&pkt->io_errors) > 0) {
+ pkt_set_state(pkt, PACKET_RECOVERY_STATE);
+ } else {
+ pkt_start_write(pd, pkt);
+ }
+ break;
+
+ case PACKET_WRITE_WAIT_STATE:
+ if (atomic_read(&pkt->io_wait) > 0)
+ return;
+
+ if (!pkt->w_bio->bi_status) {
+ pkt_set_state(pkt, PACKET_FINISHED_STATE);
+ } else {
+ pkt_set_state(pkt, PACKET_RECOVERY_STATE);
+ }
+ break;
+
+ case PACKET_RECOVERY_STATE:
+ pkt_dbg(2, pd, "No recovery possible\n");
+ pkt_set_state(pkt, PACKET_FINISHED_STATE);
+ break;
+
+ case PACKET_FINISHED_STATE:
+ pkt_finish_packet(pkt, pkt->w_bio->bi_status);
+ return;
+
+ default:
+ BUG();
+ break;
+ }
+ }
+}
+
+static void pkt_handle_packets(struct pktcdvd_device *pd)
+{
+ struct packet_data *pkt, *next;
+
+ /*
+ * Run state machine for active packets
+ */
+ list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+ if (atomic_read(&pkt->run_sm) > 0) {
+ atomic_set(&pkt->run_sm, 0);
+ pkt_run_state_machine(pd, pkt);
+ }
+ }
+
+ /*
+ * Move no longer active packets to the free list
+ */
+ spin_lock(&pd->cdrw.active_list_lock);
+ list_for_each_entry_safe(pkt, next, &pd->cdrw.pkt_active_list, list) {
+ if (pkt->state == PACKET_FINISHED_STATE) {
+ list_del(&pkt->list);
+ pkt_put_packet_data(pd, pkt);
+ pkt_set_state(pkt, PACKET_IDLE_STATE);
+ atomic_set(&pd->scan_queue, 1);
+ }
+ }
+ spin_unlock(&pd->cdrw.active_list_lock);
+}
+
+static void pkt_count_states(struct pktcdvd_device *pd, int *states)
+{
+ struct packet_data *pkt;
+ int i;
+
+ for (i = 0; i < PACKET_NUM_STATES; i++)
+ states[i] = 0;
+
+ spin_lock(&pd->cdrw.active_list_lock);
+ list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+ states[pkt->state]++;
+ }
+ spin_unlock(&pd->cdrw.active_list_lock);
+}
+
+/*
+ * kcdrwd is woken up when writes have been queued for one of our
+ * registered devices
+ */
+static int kcdrwd(void *foobar)
+{
+ struct pktcdvd_device *pd = foobar;
+ struct packet_data *pkt;
+ long min_sleep_time, residue;
+
+ set_user_nice(current, MIN_NICE);
+ set_freezable();
+
+ for (;;) {
+ DECLARE_WAITQUEUE(wait, current);
+
+ /*
+ * Wait until there is something to do
+ */
+ add_wait_queue(&pd->wqueue, &wait);
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ /* Check if we need to run pkt_handle_queue */
+ if (atomic_read(&pd->scan_queue) > 0)
+ goto work_to_do;
+
+ /* Check if we need to run the state machine for some packet */
+ list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+ if (atomic_read(&pkt->run_sm) > 0)
+ goto work_to_do;
+ }
+
+ /* Check if we need to process the iosched queues */
+ if (atomic_read(&pd->iosched.attention) != 0)
+ goto work_to_do;
+
+ /* Otherwise, go to sleep */
+ if (PACKET_DEBUG > 1) {
+ int states[PACKET_NUM_STATES];
+ pkt_count_states(pd, states);
+ pkt_dbg(2, pd, "i:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
+ states[0], states[1], states[2],
+ states[3], states[4], states[5]);
+ }
+
+ min_sleep_time = MAX_SCHEDULE_TIMEOUT;
+ list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+ if (pkt->sleep_time && pkt->sleep_time < min_sleep_time)
+ min_sleep_time = pkt->sleep_time;
+ }
+
+ pkt_dbg(2, pd, "sleeping\n");
+ residue = schedule_timeout(min_sleep_time);
+ pkt_dbg(2, pd, "wake up\n");
+
+ /* make swsusp happy with our thread */
+ try_to_freeze();
+
+ list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+ if (!pkt->sleep_time)
+ continue;
+ pkt->sleep_time -= min_sleep_time - residue;
+ if (pkt->sleep_time <= 0) {
+ pkt->sleep_time = 0;
+ atomic_inc(&pkt->run_sm);
+ }
+ }
+
+ if (kthread_should_stop())
+ break;
+ }
+work_to_do:
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&pd->wqueue, &wait);
+
+ if (kthread_should_stop())
+ break;
+
+ /*
+ * if pkt_handle_queue returns true, we can queue
+ * another request.
+ */
+ while (pkt_handle_queue(pd))
+ ;
+
+ /*
+ * Handle packet state machine
+ */
+ pkt_handle_packets(pd);
+
+ /*
+ * Handle iosched queues
+ */
+ pkt_iosched_process_queue(pd);
+ }
+
+ return 0;
+}
+
+static void pkt_print_settings(struct pktcdvd_device *pd)
+{
+ pkt_info(pd, "%s packets, %u blocks, Mode-%c disc\n",
+ pd->settings.fp ? "Fixed" : "Variable",
+ pd->settings.size >> 2,
+ pd->settings.block_mode == 8 ? '1' : '2');
+}
+
+static int pkt_mode_sense(struct pktcdvd_device *pd, struct packet_command *cgc, int page_code, int page_control)
+{
+ memset(cgc->cmd, 0, sizeof(cgc->cmd));
+
+ cgc->cmd[0] = GPCMD_MODE_SENSE_10;
+ cgc->cmd[2] = page_code | (page_control << 6);
+ cgc->cmd[7] = cgc->buflen >> 8;
+ cgc->cmd[8] = cgc->buflen & 0xff;
+ cgc->data_direction = CGC_DATA_READ;
+ return pkt_generic_packet(pd, cgc);
+}
+
+static int pkt_mode_select(struct pktcdvd_device *pd, struct packet_command *cgc)
+{
+ memset(cgc->cmd, 0, sizeof(cgc->cmd));
+ memset(cgc->buffer, 0, 2);
+ cgc->cmd[0] = GPCMD_MODE_SELECT_10;
+ cgc->cmd[1] = 0x10; /* PF */
+ cgc->cmd[7] = cgc->buflen >> 8;
+ cgc->cmd[8] = cgc->buflen & 0xff;
+ cgc->data_direction = CGC_DATA_WRITE;
+ return pkt_generic_packet(pd, cgc);
+}
+
+static int pkt_get_disc_info(struct pktcdvd_device *pd, disc_information *di)
+{
+ struct packet_command cgc;
+ int ret;
+
+ /* set up command and get the disc info */
+ init_cdrom_command(&cgc, di, sizeof(*di), CGC_DATA_READ);
+ cgc.cmd[0] = GPCMD_READ_DISC_INFO;
+ cgc.cmd[8] = cgc.buflen = 2;
+ cgc.quiet = 1;
+
+ ret = pkt_generic_packet(pd, &cgc);
+ if (ret)
+ return ret;
+
+ /* not all drives have the same disc_info length, so requeue
+ * packet with the length the drive tells us it can supply
+ */
+ cgc.buflen = be16_to_cpu(di->disc_information_length) +
+ sizeof(di->disc_information_length);
+
+ if (cgc.buflen > sizeof(disc_information))
+ cgc.buflen = sizeof(disc_information);
+
+ cgc.cmd[8] = cgc.buflen;
+ return pkt_generic_packet(pd, &cgc);
+}
+
+static int pkt_get_track_info(struct pktcdvd_device *pd, __u16 track, __u8 type, track_information *ti)
+{
+ struct packet_command cgc;
+ int ret;
+
+ init_cdrom_command(&cgc, ti, 8, CGC_DATA_READ);
+ cgc.cmd[0] = GPCMD_READ_TRACK_RZONE_INFO;
+ cgc.cmd[1] = type & 3;
+ cgc.cmd[4] = (track & 0xff00) >> 8;
+ cgc.cmd[5] = track & 0xff;
+ cgc.cmd[8] = 8;
+ cgc.quiet = 1;
+
+ ret = pkt_generic_packet(pd, &cgc);
+ if (ret)
+ return ret;
+
+ cgc.buflen = be16_to_cpu(ti->track_information_length) +
+ sizeof(ti->track_information_length);
+
+ if (cgc.buflen > sizeof(track_information))
+ cgc.buflen = sizeof(track_information);
+
+ cgc.cmd[8] = cgc.buflen;
+ return pkt_generic_packet(pd, &cgc);
+}
+
+static noinline_for_stack int pkt_get_last_written(struct pktcdvd_device *pd,
+ long *last_written)
+{
+ disc_information di;
+ track_information ti;
+ __u32 last_track;
+ int ret;
+
+ ret = pkt_get_disc_info(pd, &di);
+ if (ret)
+ return ret;
+
+ last_track = (di.last_track_msb << 8) | di.last_track_lsb;
+ ret = pkt_get_track_info(pd, last_track, 1, &ti);
+ if (ret)
+ return ret;
+
+ /* if this track is blank, try the previous. */
+ if (ti.blank) {
+ last_track--;
+ ret = pkt_get_track_info(pd, last_track, 1, &ti);
+ if (ret)
+ return ret;
+ }
+
+ /* if last recorded field is valid, return it. */
+ if (ti.lra_v) {
+ *last_written = be32_to_cpu(ti.last_rec_address);
+ } else {
+ /* make it up instead */
+ *last_written = be32_to_cpu(ti.track_start) +
+ be32_to_cpu(ti.track_size);
+ if (ti.free_blocks)
+ *last_written -= (be32_to_cpu(ti.free_blocks) + 7);
+ }
+ return 0;
+}
+
+/*
+ * write mode select package based on pd->settings
+ */
+static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
+{
+ struct packet_command cgc;
+ struct scsi_sense_hdr sshdr;
+ write_param_page *wp;
+ char buffer[128];
+ int ret, size;
+
+ /* doesn't apply to DVD+RW or DVD-RAM */
+ if ((pd->mmc3_profile == 0x1a) || (pd->mmc3_profile == 0x12))
+ return 0;
+
+ memset(buffer, 0, sizeof(buffer));
+ init_cdrom_command(&cgc, buffer, sizeof(*wp), CGC_DATA_READ);
+ cgc.sshdr = &sshdr;
+ ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0);
+ if (ret) {
+ pkt_dump_sense(pd, &cgc);
+ return ret;
+ }
+
+ size = 2 + ((buffer[0] << 8) | (buffer[1] & 0xff));
+ pd->mode_offset = (buffer[6] << 8) | (buffer[7] & 0xff);
+ if (size > sizeof(buffer))
+ size = sizeof(buffer);
+
+ /*
+ * now get it all
+ */
+ init_cdrom_command(&cgc, buffer, size, CGC_DATA_READ);
+ cgc.sshdr = &sshdr;
+ ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0);
+ if (ret) {
+ pkt_dump_sense(pd, &cgc);
+ return ret;
+ }
+
+ /*
+ * write page is offset header + block descriptor length
+ */
+ wp = (write_param_page *) &buffer[sizeof(struct mode_page_header) + pd->mode_offset];
+
+ wp->fp = pd->settings.fp;
+ wp->track_mode = pd->settings.track_mode;
+ wp->write_type = pd->settings.write_type;
+ wp->data_block_type = pd->settings.block_mode;
+
+ wp->multi_session = 0;
+
+#ifdef PACKET_USE_LS
+ wp->link_size = 7;
+ wp->ls_v = 1;
+#endif
+
+ if (wp->data_block_type == PACKET_BLOCK_MODE1) {
+ wp->session_format = 0;
+ wp->subhdr2 = 0x20;
+ } else if (wp->data_block_type == PACKET_BLOCK_MODE2) {
+ wp->session_format = 0x20;
+ wp->subhdr2 = 8;
+#if 0
+ wp->mcn[0] = 0x80;
+ memcpy(&wp->mcn[1], PACKET_MCN, sizeof(wp->mcn) - 1);
+#endif
+ } else {
+ /*
+ * paranoia
+ */
+ pkt_err(pd, "write mode wrong %d\n", wp->data_block_type);
+ return 1;
+ }
+ wp->packet_size = cpu_to_be32(pd->settings.size >> 2);
+
+ cgc.buflen = cgc.cmd[8] = size;
+ ret = pkt_mode_select(pd, &cgc);
+ if (ret) {
+ pkt_dump_sense(pd, &cgc);
+ return ret;
+ }
+
+ pkt_print_settings(pd);
+ return 0;
+}
+
+/*
+ * 1 -- we can write to this track, 0 -- we can't
+ */
+static int pkt_writable_track(struct pktcdvd_device *pd, track_information *ti)
+{
+ switch (pd->mmc3_profile) {
+ case 0x1a: /* DVD+RW */
+ case 0x12: /* DVD-RAM */
+ /* The track is always writable on DVD+RW/DVD-RAM */
+ return 1;
+ default:
+ break;
+ }
+
+ if (!ti->packet || !ti->fp)
+ return 0;
+
+ /*
+ * "good" settings as per Mt Fuji.
+ */
+ if (ti->rt == 0 && ti->blank == 0)
+ return 1;
+
+ if (ti->rt == 0 && ti->blank == 1)
+ return 1;
+
+ if (ti->rt == 1 && ti->blank == 0)
+ return 1;
+
+ pkt_err(pd, "bad state %d-%d-%d\n", ti->rt, ti->blank, ti->packet);
+ return 0;
+}
+
+/*
+ * 1 -- we can write to this disc, 0 -- we can't
+ */
+static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di)
+{
+ switch (pd->mmc3_profile) {
+ case 0x0a: /* CD-RW */
+ case 0xffff: /* MMC3 not supported */
+ break;
+ case 0x1a: /* DVD+RW */
+ case 0x13: /* DVD-RW */
+ case 0x12: /* DVD-RAM */
+ return 1;
+ default:
+ pkt_dbg(2, pd, "Wrong disc profile (%x)\n",
+ pd->mmc3_profile);
+ return 0;
+ }
+
+ /*
+ * for disc type 0xff we should probably reserve a new track.
+ * but i'm not sure, should we leave this to user apps? probably.
+ */
+ if (di->disc_type == 0xff) {
+ pkt_notice(pd, "unknown disc - no track?\n");
+ return 0;
+ }
+
+ if (di->disc_type != 0x20 && di->disc_type != 0) {
+ pkt_err(pd, "wrong disc type (%x)\n", di->disc_type);
+ return 0;
+ }
+
+ if (di->erasable == 0) {
+ pkt_notice(pd, "disc not erasable\n");
+ return 0;
+ }
+
+ if (di->border_status == PACKET_SESSION_RESERVED) {
+ pkt_err(pd, "can't write to last track (reserved)\n");
+ return 0;
+ }
+
+ return 1;
+}
+
+static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
+{
+ struct packet_command cgc;
+ unsigned char buf[12];
+ disc_information di;
+ track_information ti;
+ int ret, track;
+
+ init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
+ cgc.cmd[0] = GPCMD_GET_CONFIGURATION;
+ cgc.cmd[8] = 8;
+ ret = pkt_generic_packet(pd, &cgc);
+ pd->mmc3_profile = ret ? 0xffff : buf[6] << 8 | buf[7];
+
+ memset(&di, 0, sizeof(disc_information));
+ memset(&ti, 0, sizeof(track_information));
+
+ ret = pkt_get_disc_info(pd, &di);
+ if (ret) {
+ pkt_err(pd, "failed get_disc\n");
+ return ret;
+ }
+
+ if (!pkt_writable_disc(pd, &di))
+ return -EROFS;
+
+ pd->type = di.erasable ? PACKET_CDRW : PACKET_CDR;
+
+ track = 1; /* (di.last_track_msb << 8) | di.last_track_lsb; */
+ ret = pkt_get_track_info(pd, track, 1, &ti);
+ if (ret) {
+ pkt_err(pd, "failed get_track\n");
+ return ret;
+ }
+
+ if (!pkt_writable_track(pd, &ti)) {
+ pkt_err(pd, "can't write to this track\n");
+ return -EROFS;
+ }
+
+ /*
+ * we keep packet size in 512 byte units, makes it easier to
+ * deal with request calculations.
+ */
+ pd->settings.size = be32_to_cpu(ti.fixed_packet_size) << 2;
+ if (pd->settings.size == 0) {
+ pkt_notice(pd, "detected zero packet size!\n");
+ return -ENXIO;
+ }
+ if (pd->settings.size > PACKET_MAX_SECTORS) {
+ pkt_err(pd, "packet size is too big\n");
+ return -EROFS;
+ }
+ pd->settings.fp = ti.fp;
+ pd->offset = (be32_to_cpu(ti.track_start) << 2) & (pd->settings.size - 1);
+
+ if (ti.nwa_v) {
+ pd->nwa = be32_to_cpu(ti.next_writable);
+ set_bit(PACKET_NWA_VALID, &pd->flags);
+ }
+
+ /*
+ * in theory we could use lra on -RW media as well and just zero
+ * blocks that haven't been written yet, but in practice that
+ * is just a no-go. we'll use that for -R, naturally.
+ */
+ if (ti.lra_v) {
+ pd->lra = be32_to_cpu(ti.last_rec_address);
+ set_bit(PACKET_LRA_VALID, &pd->flags);
+ } else {
+ pd->lra = 0xffffffff;
+ set_bit(PACKET_LRA_VALID, &pd->flags);
+ }
+
+ /*
+ * fine for now
+ */
+ pd->settings.link_loss = 7;
+ pd->settings.write_type = 0; /* packet */
+ pd->settings.track_mode = ti.track_mode;
+
+ /*
+ * mode1 or mode2 disc
+ */
+ switch (ti.data_mode) {
+ case PACKET_MODE1:
+ pd->settings.block_mode = PACKET_BLOCK_MODE1;
+ break;
+ case PACKET_MODE2:
+ pd->settings.block_mode = PACKET_BLOCK_MODE2;
+ break;
+ default:
+ pkt_err(pd, "unknown data mode\n");
+ return -EROFS;
+ }
+ return 0;
+}
+
+/*
+ * enable/disable write caching on drive
+ */
+static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd,
+ int set)
+{
+ struct packet_command cgc;
+ struct scsi_sense_hdr sshdr;
+ unsigned char buf[64];
+ int ret;
+
+ init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
+ cgc.sshdr = &sshdr;
+ cgc.buflen = pd->mode_offset + 12;
+
+ /*
+ * caching mode page might not be there, so quiet this command
+ */
+ cgc.quiet = 1;
+
+ ret = pkt_mode_sense(pd, &cgc, GPMODE_WCACHING_PAGE, 0);
+ if (ret)
+ return ret;
+
+ buf[pd->mode_offset + 10] |= (!!set << 2);
+
+ cgc.buflen = cgc.cmd[8] = 2 + ((buf[0] << 8) | (buf[1] & 0xff));
+ ret = pkt_mode_select(pd, &cgc);
+ if (ret) {
+ pkt_err(pd, "write caching control failed\n");
+ pkt_dump_sense(pd, &cgc);
+ } else if (!ret && set)
+ pkt_notice(pd, "enabled write caching\n");
+ return ret;
+}
+
+static int pkt_lock_door(struct pktcdvd_device *pd, int lockflag)
+{
+ struct packet_command cgc;
+
+ init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
+ cgc.cmd[0] = GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL;
+ cgc.cmd[4] = lockflag ? 1 : 0;
+ return pkt_generic_packet(pd, &cgc);
+}
+
+/*
+ * Returns drive maximum write speed
+ */
+static noinline_for_stack int pkt_get_max_speed(struct pktcdvd_device *pd,
+ unsigned *write_speed)
+{
+ struct packet_command cgc;
+ struct scsi_sense_hdr sshdr;
+ unsigned char buf[256+18];
+ unsigned char *cap_buf;
+ int ret, offset;
+
+ cap_buf = &buf[sizeof(struct mode_page_header) + pd->mode_offset];
+ init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_UNKNOWN);
+ cgc.sshdr = &sshdr;
+
+ ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0);
+ if (ret) {
+ cgc.buflen = pd->mode_offset + cap_buf[1] + 2 +
+ sizeof(struct mode_page_header);
+ ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0);
+ if (ret) {
+ pkt_dump_sense(pd, &cgc);
+ return ret;
+ }
+ }
+
+ offset = 20; /* Obsoleted field, used by older drives */
+ if (cap_buf[1] >= 28)
+ offset = 28; /* Current write speed selected */
+ if (cap_buf[1] >= 30) {
+ /* If the drive reports at least one "Logical Unit Write
+ * Speed Performance Descriptor Block", use the information
+ * in the first block. (contains the highest speed)
+ */
+ int num_spdb = (cap_buf[30] << 8) + cap_buf[31];
+ if (num_spdb > 0)
+ offset = 34;
+ }
+
+ *write_speed = (cap_buf[offset] << 8) | cap_buf[offset + 1];
+ return 0;
+}
+
+/* These tables from cdrecord - I don't have orange book */
+/* standard speed CD-RW (1-4x) */
+static char clv_to_speed[16] = {
+ /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
+ 0, 2, 4, 6, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+/* high speed CD-RW (-10x) */
+static char hs_clv_to_speed[16] = {
+ /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
+ 0, 2, 4, 6, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+/* ultra high speed CD-RW */
+static char us_clv_to_speed[16] = {
+ /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
+ 0, 2, 4, 8, 0, 0,16, 0,24,32,40,48, 0, 0, 0, 0
+};
+
+/*
+ * reads the maximum media speed from ATIP
+ */
+static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
+ unsigned *speed)
+{
+ struct packet_command cgc;
+ struct scsi_sense_hdr sshdr;
+ unsigned char buf[64];
+ unsigned int size, st, sp;
+ int ret;
+
+ init_cdrom_command(&cgc, buf, 2, CGC_DATA_READ);
+ cgc.sshdr = &sshdr;
+ cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
+ cgc.cmd[1] = 2;
+ cgc.cmd[2] = 4; /* READ ATIP */
+ cgc.cmd[8] = 2;
+ ret = pkt_generic_packet(pd, &cgc);
+ if (ret) {
+ pkt_dump_sense(pd, &cgc);
+ return ret;
+ }
+ size = ((unsigned int) buf[0]<<8) + buf[1] + 2;
+ if (size > sizeof(buf))
+ size = sizeof(buf);
+
+ init_cdrom_command(&cgc, buf, size, CGC_DATA_READ);
+ cgc.sshdr = &sshdr;
+ cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
+ cgc.cmd[1] = 2;
+ cgc.cmd[2] = 4;
+ cgc.cmd[8] = size;
+ ret = pkt_generic_packet(pd, &cgc);
+ if (ret) {
+ pkt_dump_sense(pd, &cgc);
+ return ret;
+ }
+
+ if (!(buf[6] & 0x40)) {
+ pkt_notice(pd, "disc type is not CD-RW\n");
+ return 1;
+ }
+ if (!(buf[6] & 0x4)) {
+ pkt_notice(pd, "A1 values on media are not valid, maybe not CDRW?\n");
+ return 1;
+ }
+
+ st = (buf[6] >> 3) & 0x7; /* disc sub-type */
+
+ sp = buf[16] & 0xf; /* max speed from ATIP A1 field */
+
+ /* Info from cdrecord */
+ switch (st) {
+ case 0: /* standard speed */
+ *speed = clv_to_speed[sp];
+ break;
+ case 1: /* high speed */
+ *speed = hs_clv_to_speed[sp];
+ break;
+ case 2: /* ultra high speed */
+ *speed = us_clv_to_speed[sp];
+ break;
+ default:
+ pkt_notice(pd, "unknown disc sub-type %d\n", st);
+ return 1;
+ }
+ if (*speed) {
+ pkt_info(pd, "maximum media speed: %d\n", *speed);
+ return 0;
+ } else {
+ pkt_notice(pd, "unknown speed %d for sub-type %d\n", sp, st);
+ return 1;
+ }
+}
+
+static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd)
+{
+ struct packet_command cgc;
+ struct scsi_sense_hdr sshdr;
+ int ret;
+
+ pkt_dbg(2, pd, "Performing OPC\n");
+
+ init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
+ cgc.sshdr = &sshdr;
+ cgc.timeout = 60*HZ;
+ cgc.cmd[0] = GPCMD_SEND_OPC;
+ cgc.cmd[1] = 1;
+ ret = pkt_generic_packet(pd, &cgc);
+ if (ret)
+ pkt_dump_sense(pd, &cgc);
+ return ret;
+}
+
+static int pkt_open_write(struct pktcdvd_device *pd)
+{
+ int ret;
+ unsigned int write_speed, media_write_speed, read_speed;
+
+ ret = pkt_probe_settings(pd);
+ if (ret) {
+ pkt_dbg(2, pd, "failed probe\n");
+ return ret;
+ }
+
+ ret = pkt_set_write_settings(pd);
+ if (ret) {
+ pkt_dbg(1, pd, "failed saving write settings\n");
+ return -EIO;
+ }
+
+ pkt_write_caching(pd, USE_WCACHING);
+
+ ret = pkt_get_max_speed(pd, &write_speed);
+ if (ret)
+ write_speed = 16 * 177;
+ switch (pd->mmc3_profile) {
+ case 0x13: /* DVD-RW */
+ case 0x1a: /* DVD+RW */
+ case 0x12: /* DVD-RAM */
+ pkt_dbg(1, pd, "write speed %ukB/s\n", write_speed);
+ break;
+ default:
+ ret = pkt_media_speed(pd, &media_write_speed);
+ if (ret)
+ media_write_speed = 16;
+ write_speed = min(write_speed, media_write_speed * 177);
+ pkt_dbg(1, pd, "write speed %ux\n", write_speed / 176);
+ break;
+ }
+ read_speed = write_speed;
+
+ ret = pkt_set_speed(pd, write_speed, read_speed);
+ if (ret) {
+ pkt_dbg(1, pd, "couldn't set write speed\n");
+ return -EIO;
+ }
+ pd->write_speed = write_speed;
+ pd->read_speed = read_speed;
+
+ ret = pkt_perform_opc(pd);
+ if (ret) {
+ pkt_dbg(1, pd, "Optimum Power Calibration failed\n");
+ }
+
+ return 0;
+}
+
+/*
+ * called at open time.
+ */
+static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
+{
+ int ret;
+ long lba;
+ struct request_queue *q;
+ struct block_device *bdev;
+
+ /*
+ * We need to re-open the cdrom device without O_NONBLOCK to be able
+ * to read/write from/to it. It is already opened in O_NONBLOCK mode
+ * so open should not fail.
+ */
+ bdev = blkdev_get_by_dev(pd->bdev->bd_dev, FMODE_READ | FMODE_EXCL, pd);
+ if (IS_ERR(bdev)) {
+ ret = PTR_ERR(bdev);
+ goto out;
+ }
+
+ ret = pkt_get_last_written(pd, &lba);
+ if (ret) {
+ pkt_err(pd, "pkt_get_last_written failed\n");
+ goto out_putdev;
+ }
+
+ set_capacity(pd->disk, lba << 2);
+ set_capacity_and_notify(pd->bdev->bd_disk, lba << 2);
+
+ q = bdev_get_queue(pd->bdev);
+ if (write) {
+ ret = pkt_open_write(pd);
+ if (ret)
+ goto out_putdev;
+ /*
+ * Some CDRW drives can not handle writes larger than one packet,
+ * even if the size is a multiple of the packet size.
+ */
+ blk_queue_max_hw_sectors(q, pd->settings.size);
+ set_bit(PACKET_WRITABLE, &pd->flags);
+ } else {
+ pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
+ clear_bit(PACKET_WRITABLE, &pd->flags);
+ }
+
+ ret = pkt_set_segment_merging(pd, q);
+ if (ret)
+ goto out_putdev;
+
+ if (write) {
+ if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) {
+ pkt_err(pd, "not enough memory for buffers\n");
+ ret = -ENOMEM;
+ goto out_putdev;
+ }
+ pkt_info(pd, "%lukB available on disc\n", lba << 1);
+ }
+
+ return 0;
+
+out_putdev:
+ blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
+out:
+ return ret;
+}
+
+/*
+ * called when the device is closed. makes sure that the device flushes
+ * the internal cache before we close.
+ */
+static void pkt_release_dev(struct pktcdvd_device *pd, int flush)
+{
+ if (flush && pkt_flush_cache(pd))
+ pkt_dbg(1, pd, "not flushing cache\n");
+
+ pkt_lock_door(pd, 0);
+
+ pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
+ blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL);
+
+ pkt_shrink_pktlist(pd);
+}
+
+static struct pktcdvd_device *pkt_find_dev_from_minor(unsigned int dev_minor)
+{
+ if (dev_minor >= MAX_WRITERS)
+ return NULL;
+
+ dev_minor = array_index_nospec(dev_minor, MAX_WRITERS);
+ return pkt_devs[dev_minor];
+}
+
+static int pkt_open(struct block_device *bdev, fmode_t mode)
+{
+ struct pktcdvd_device *pd = NULL;
+ int ret;
+
+ mutex_lock(&pktcdvd_mutex);
+ mutex_lock(&ctl_mutex);
+ pd = pkt_find_dev_from_minor(MINOR(bdev->bd_dev));
+ if (!pd) {
+ ret = -ENODEV;
+ goto out;
+ }
+ BUG_ON(pd->refcnt < 0);
+
+ pd->refcnt++;
+ if (pd->refcnt > 1) {
+ if ((mode & FMODE_WRITE) &&
+ !test_bit(PACKET_WRITABLE, &pd->flags)) {
+ ret = -EBUSY;
+ goto out_dec;
+ }
+ } else {
+ ret = pkt_open_dev(pd, mode & FMODE_WRITE);
+ if (ret)
+ goto out_dec;
+ /*
+ * needed here as well, since ext2 (among others) may change
+ * the blocksize at mount time
+ */
+ set_blocksize(bdev, CD_FRAMESIZE);
+ }
+
+ mutex_unlock(&ctl_mutex);
+ mutex_unlock(&pktcdvd_mutex);
+ return 0;
+
+out_dec:
+ pd->refcnt--;
+out:
+ mutex_unlock(&ctl_mutex);
+ mutex_unlock(&pktcdvd_mutex);
+ return ret;
+}
+
+static void pkt_close(struct gendisk *disk, fmode_t mode)
+{
+ struct pktcdvd_device *pd = disk->private_data;
+
+ mutex_lock(&pktcdvd_mutex);
+ mutex_lock(&ctl_mutex);
+ pd->refcnt--;
+ BUG_ON(pd->refcnt < 0);
+ if (pd->refcnt == 0) {
+ int flush = test_bit(PACKET_WRITABLE, &pd->flags);
+ pkt_release_dev(pd, flush);
+ }
+ mutex_unlock(&ctl_mutex);
+ mutex_unlock(&pktcdvd_mutex);
+}
+
+
+static void pkt_end_io_read_cloned(struct bio *bio)
+{
+ struct packet_stacked_data *psd = bio->bi_private;
+ struct pktcdvd_device *pd = psd->pd;
+
+ psd->bio->bi_status = bio->bi_status;
+ bio_put(bio);
+ bio_endio(psd->bio);
+ mempool_free(psd, &psd_pool);
+ pkt_bio_finished(pd);
+}
+
+static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
+{
+ struct bio *cloned_bio =
+ bio_alloc_clone(pd->bdev, bio, GFP_NOIO, &pkt_bio_set);
+ struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO);
+
+ psd->pd = pd;
+ psd->bio = bio;
+ cloned_bio->bi_private = psd;
+ cloned_bio->bi_end_io = pkt_end_io_read_cloned;
+ pd->stats.secs_r += bio_sectors(bio);
+ pkt_queue_bio(pd, cloned_bio);
+}
+
+static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
+{
+ struct pktcdvd_device *pd = q->queuedata;
+ sector_t zone;
+ struct packet_data *pkt;
+ int was_empty, blocked_bio;
+ struct pkt_rb_node *node;
+
+ zone = get_zone(bio->bi_iter.bi_sector, pd);
+
+ /*
+ * If we find a matching packet in state WAITING or READ_WAIT, we can
+ * just append this bio to that packet.
+ */
+ spin_lock(&pd->cdrw.active_list_lock);
+ blocked_bio = 0;
+ list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+ if (pkt->sector == zone) {
+ spin_lock(&pkt->lock);
+ if ((pkt->state == PACKET_WAITING_STATE) ||
+ (pkt->state == PACKET_READ_WAIT_STATE)) {
+ bio_list_add(&pkt->orig_bios, bio);
+ pkt->write_size +=
+ bio->bi_iter.bi_size / CD_FRAMESIZE;
+ if ((pkt->write_size >= pkt->frames) &&
+ (pkt->state == PACKET_WAITING_STATE)) {
+ atomic_inc(&pkt->run_sm);
+ wake_up(&pd->wqueue);
+ }
+ spin_unlock(&pkt->lock);
+ spin_unlock(&pd->cdrw.active_list_lock);
+ return;
+ } else {
+ blocked_bio = 1;
+ }
+ spin_unlock(&pkt->lock);
+ }
+ }
+ spin_unlock(&pd->cdrw.active_list_lock);
+
+ /*
+ * Test if there is enough room left in the bio work queue
+ * (queue size >= congestion on mark).
+ * If not, wait till the work queue size is below the congestion off mark.
+ */
+ spin_lock(&pd->lock);
+ if (pd->write_congestion_on > 0
+ && pd->bio_queue_size >= pd->write_congestion_on) {
+ struct wait_bit_queue_entry wqe;
+
+ init_wait_var_entry(&wqe, &pd->congested, 0);
+ for (;;) {
+ prepare_to_wait_event(__var_waitqueue(&pd->congested),
+ &wqe.wq_entry,
+ TASK_UNINTERRUPTIBLE);
+ if (pd->bio_queue_size <= pd->write_congestion_off)
+ break;
+ pd->congested = true;
+ spin_unlock(&pd->lock);
+ schedule();
+ spin_lock(&pd->lock);
+ }
+ }
+ spin_unlock(&pd->lock);
+
+ /*
+ * No matching packet found. Store the bio in the work queue.
+ */
+ node = mempool_alloc(&pd->rb_pool, GFP_NOIO);
+ node->bio = bio;
+ spin_lock(&pd->lock);
+ BUG_ON(pd->bio_queue_size < 0);
+ was_empty = (pd->bio_queue_size == 0);
+ pkt_rbtree_insert(pd, node);
+ spin_unlock(&pd->lock);
+
+ /*
+ * Wake up the worker thread.
+ */
+ atomic_set(&pd->scan_queue, 1);
+ if (was_empty) {
+ /* This wake_up is required for correct operation */
+ wake_up(&pd->wqueue);
+ } else if (!list_empty(&pd->cdrw.pkt_free_list) && !blocked_bio) {
+ /*
+ * This wake up is not required for correct operation,
+ * but improves performance in some cases.
+ */
+ wake_up(&pd->wqueue);
+ }
+}
+
+static void pkt_submit_bio(struct bio *bio)
+{
+ struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata;
+ struct bio *split;
+
+ bio = bio_split_to_limits(bio);
+ if (!bio)
+ return;
+
+ pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
+ (unsigned long long)bio->bi_iter.bi_sector,
+ (unsigned long long)bio_end_sector(bio));
+
+ /*
+ * Clone READ bios so we can have our own bi_end_io callback.
+ */
+ if (bio_data_dir(bio) == READ) {
+ pkt_make_request_read(pd, bio);
+ return;
+ }
+
+ if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
+ pkt_notice(pd, "WRITE for ro device (%llu)\n",
+ (unsigned long long)bio->bi_iter.bi_sector);
+ goto end_io;
+ }
+
+ if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) {
+ pkt_err(pd, "wrong bio size\n");
+ goto end_io;
+ }
+
+ do {
+ sector_t zone = get_zone(bio->bi_iter.bi_sector, pd);
+ sector_t last_zone = get_zone(bio_end_sector(bio) - 1, pd);
+
+ if (last_zone != zone) {
+ BUG_ON(last_zone != zone + pd->settings.size);
+
+ split = bio_split(bio, last_zone -
+ bio->bi_iter.bi_sector,
+ GFP_NOIO, &pkt_bio_set);
+ bio_chain(split, bio);
+ } else {
+ split = bio;
+ }
+
+ pkt_make_request_write(bio->bi_bdev->bd_disk->queue, split);
+ } while (split != bio);
+
+ return;
+end_io:
+ bio_io_error(bio);
+}
+
+static void pkt_init_queue(struct pktcdvd_device *pd)
+{
+ struct request_queue *q = pd->disk->queue;
+
+ blk_queue_logical_block_size(q, CD_FRAMESIZE);
+ blk_queue_max_hw_sectors(q, PACKET_MAX_SECTORS);
+ q->queuedata = pd;
+}
+
+static int pkt_seq_show(struct seq_file *m, void *p)
+{
+ struct pktcdvd_device *pd = m->private;
+ char *msg;
+ int states[PACKET_NUM_STATES];
+
+ seq_printf(m, "Writer %s mapped to %pg:\n", pd->name, pd->bdev);
+
+ seq_printf(m, "\nSettings:\n");
+ seq_printf(m, "\tpacket size:\t\t%dkB\n", pd->settings.size / 2);
+
+ if (pd->settings.write_type == 0)
+ msg = "Packet";
+ else
+ msg = "Unknown";
+ seq_printf(m, "\twrite type:\t\t%s\n", msg);
+
+ seq_printf(m, "\tpacket type:\t\t%s\n", pd->settings.fp ? "Fixed" : "Variable");
+ seq_printf(m, "\tlink loss:\t\t%d\n", pd->settings.link_loss);
+
+ seq_printf(m, "\ttrack mode:\t\t%d\n", pd->settings.track_mode);
+
+ if (pd->settings.block_mode == PACKET_BLOCK_MODE1)
+ msg = "Mode 1";
+ else if (pd->settings.block_mode == PACKET_BLOCK_MODE2)
+ msg = "Mode 2";
+ else
+ msg = "Unknown";
+ seq_printf(m, "\tblock mode:\t\t%s\n", msg);
+
+ seq_printf(m, "\nStatistics:\n");
+ seq_printf(m, "\tpackets started:\t%lu\n", pd->stats.pkt_started);
+ seq_printf(m, "\tpackets ended:\t\t%lu\n", pd->stats.pkt_ended);
+ seq_printf(m, "\twritten:\t\t%lukB\n", pd->stats.secs_w >> 1);
+ seq_printf(m, "\tread gather:\t\t%lukB\n", pd->stats.secs_rg >> 1);
+ seq_printf(m, "\tread:\t\t\t%lukB\n", pd->stats.secs_r >> 1);
+
+ seq_printf(m, "\nMisc:\n");
+ seq_printf(m, "\treference count:\t%d\n", pd->refcnt);
+ seq_printf(m, "\tflags:\t\t\t0x%lx\n", pd->flags);
+ seq_printf(m, "\tread speed:\t\t%ukB/s\n", pd->read_speed);
+ seq_printf(m, "\twrite speed:\t\t%ukB/s\n", pd->write_speed);
+ seq_printf(m, "\tstart offset:\t\t%lu\n", pd->offset);
+ seq_printf(m, "\tmode page offset:\t%u\n", pd->mode_offset);
+
+ seq_printf(m, "\nQueue state:\n");
+ seq_printf(m, "\tbios queued:\t\t%d\n", pd->bio_queue_size);
+ seq_printf(m, "\tbios pending:\t\t%d\n", atomic_read(&pd->cdrw.pending_bios));
+ seq_printf(m, "\tcurrent sector:\t\t0x%llx\n", (unsigned long long)pd->current_sector);
+
+ pkt_count_states(pd, states);
+ seq_printf(m, "\tstate:\t\t\ti:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
+ states[0], states[1], states[2], states[3], states[4], states[5]);
+
+ seq_printf(m, "\twrite congestion marks:\toff=%d on=%d\n",
+ pd->write_congestion_off,
+ pd->write_congestion_on);
+ return 0;
+}
+
+static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
+{
+ int i;
+ struct block_device *bdev;
+ struct scsi_device *sdev;
+
+ if (pd->pkt_dev == dev) {
+ pkt_err(pd, "recursive setup not allowed\n");
+ return -EBUSY;
+ }
+ for (i = 0; i < MAX_WRITERS; i++) {
+ struct pktcdvd_device *pd2 = pkt_devs[i];
+ if (!pd2)
+ continue;
+ if (pd2->bdev->bd_dev == dev) {
+ pkt_err(pd, "%pg already setup\n", pd2->bdev);
+ return -EBUSY;
+ }
+ if (pd2->pkt_dev == dev) {
+ pkt_err(pd, "can't chain pktcdvd devices\n");
+ return -EBUSY;
+ }
+ }
+
+ bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_NDELAY, NULL);
+ if (IS_ERR(bdev))
+ return PTR_ERR(bdev);
+ sdev = scsi_device_from_queue(bdev->bd_disk->queue);
+ if (!sdev) {
+ blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
+ return -EINVAL;
+ }
+ put_device(&sdev->sdev_gendev);
+
+ /* This is safe, since we have a reference from open(). */
+ __module_get(THIS_MODULE);
+
+ pd->bdev = bdev;
+ set_blocksize(bdev, CD_FRAMESIZE);
+
+ pkt_init_queue(pd);
+
+ atomic_set(&pd->cdrw.pending_bios, 0);
+ pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->name);
+ if (IS_ERR(pd->cdrw.thread)) {
+ pkt_err(pd, "can't start kernel thread\n");
+ goto out_mem;
+ }
+
+ proc_create_single_data(pd->name, 0, pkt_proc, pkt_seq_show, pd);
+ pkt_dbg(1, pd, "writer mapped to %pg\n", bdev);
+ return 0;
+
+out_mem:
+ blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
+ /* This is safe: open() is still holding a reference. */
+ module_put(THIS_MODULE);
+ return -ENOMEM;
+}
+
+static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)
+{
+ struct pktcdvd_device *pd = bdev->bd_disk->private_data;
+ int ret;
+
+ pkt_dbg(2, pd, "cmd %x, dev %d:%d\n",
+ cmd, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
+
+ mutex_lock(&pktcdvd_mutex);
+ switch (cmd) {
+ case CDROMEJECT:
+ /*
+ * The door gets locked when the device is opened, so we
+ * have to unlock it or else the eject command fails.
+ */
+ if (pd->refcnt == 1)
+ pkt_lock_door(pd, 0);
+ fallthrough;
+ /*
+ * forward selected CDROM ioctls to CD-ROM, for UDF
+ */
+ case CDROMMULTISESSION:
+ case CDROMREADTOCENTRY:
+ case CDROM_LAST_WRITTEN:
+ case CDROM_SEND_PACKET:
+ case SCSI_IOCTL_SEND_COMMAND:
+ if (!bdev->bd_disk->fops->ioctl)
+ ret = -ENOTTY;
+ else
+ ret = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg);
+ break;
+ default:
+ pkt_dbg(2, pd, "Unknown ioctl (%x)\n", cmd);
+ ret = -ENOTTY;
+ }
+ mutex_unlock(&pktcdvd_mutex);
+
+ return ret;
+}
+
+static unsigned int pkt_check_events(struct gendisk *disk,
+ unsigned int clearing)
+{
+ struct pktcdvd_device *pd = disk->private_data;
+ struct gendisk *attached_disk;
+
+ if (!pd)
+ return 0;
+ if (!pd->bdev)
+ return 0;
+ attached_disk = pd->bdev->bd_disk;
+ if (!attached_disk || !attached_disk->fops->check_events)
+ return 0;
+ return attached_disk->fops->check_events(attached_disk, clearing);
+}
+
+static char *pkt_devnode(struct gendisk *disk, umode_t *mode)
+{
+ return kasprintf(GFP_KERNEL, "pktcdvd/%s", disk->disk_name);
+}
+
+static const struct block_device_operations pktcdvd_ops = {
+ .owner = THIS_MODULE,
+ .submit_bio = pkt_submit_bio,
+ .open = pkt_open,
+ .release = pkt_close,
+ .ioctl = pkt_ioctl,
+ .compat_ioctl = blkdev_compat_ptr_ioctl,
+ .check_events = pkt_check_events,
+ .devnode = pkt_devnode,
+};
+
+/*
+ * Set up mapping from pktcdvd device to CD-ROM device.
+ */
+static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
+{
+ int idx;
+ int ret = -ENOMEM;
+ struct pktcdvd_device *pd;
+ struct gendisk *disk;
+
+ mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+ for (idx = 0; idx < MAX_WRITERS; idx++)
+ if (!pkt_devs[idx])
+ break;
+ if (idx == MAX_WRITERS) {
+ pr_err("max %d writers supported\n", MAX_WRITERS);
+ ret = -EBUSY;
+ goto out_mutex;
+ }
+
+ pd = kzalloc(sizeof(struct pktcdvd_device), GFP_KERNEL);
+ if (!pd)
+ goto out_mutex;
+
+ ret = mempool_init_kmalloc_pool(&pd->rb_pool, PKT_RB_POOL_SIZE,
+ sizeof(struct pkt_rb_node));
+ if (ret)
+ goto out_mem;
+
+ INIT_LIST_HEAD(&pd->cdrw.pkt_free_list);
+ INIT_LIST_HEAD(&pd->cdrw.pkt_active_list);
+ spin_lock_init(&pd->cdrw.active_list_lock);
+
+ spin_lock_init(&pd->lock);
+ spin_lock_init(&pd->iosched.lock);
+ bio_list_init(&pd->iosched.read_queue);
+ bio_list_init(&pd->iosched.write_queue);
+ sprintf(pd->name, DRIVER_NAME"%d", idx);
+ init_waitqueue_head(&pd->wqueue);
+ pd->bio_queue = RB_ROOT;
+
+ pd->write_congestion_on = write_congestion_on;
+ pd->write_congestion_off = write_congestion_off;
+
+ ret = -ENOMEM;
+ disk = blk_alloc_disk(NUMA_NO_NODE);
+ if (!disk)
+ goto out_mem;
+ pd->disk = disk;
+ disk->major = pktdev_major;
+ disk->first_minor = idx;
+ disk->minors = 1;
+ disk->fops = &pktcdvd_ops;
+ disk->flags = GENHD_FL_REMOVABLE | GENHD_FL_NO_PART;
+ strcpy(disk->disk_name, pd->name);
+ disk->private_data = pd;
+
+ pd->pkt_dev = MKDEV(pktdev_major, idx);
+ ret = pkt_new_dev(pd, dev);
+ if (ret)
+ goto out_mem2;
+
+ /* inherit events of the host device */
+ disk->events = pd->bdev->bd_disk->events;
+
+ ret = add_disk(disk);
+ if (ret)
+ goto out_mem2;
+
+ pkt_sysfs_dev_new(pd);
+ pkt_debugfs_dev_new(pd);
+
+ pkt_devs[idx] = pd;
+ if (pkt_dev)
+ *pkt_dev = pd->pkt_dev;
+
+ mutex_unlock(&ctl_mutex);
+ return 0;
+
+out_mem2:
+ put_disk(disk);
+out_mem:
+ mempool_exit(&pd->rb_pool);
+ kfree(pd);
+out_mutex:
+ mutex_unlock(&ctl_mutex);
+ pr_err("setup of pktcdvd device failed\n");
+ return ret;
+}
+
+/*
+ * Tear down mapping from pktcdvd device to CD-ROM device.
+ */
+static int pkt_remove_dev(dev_t pkt_dev)
+{
+ struct pktcdvd_device *pd;
+ int idx;
+ int ret = 0;
+
+ mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+ for (idx = 0; idx < MAX_WRITERS; idx++) {
+ pd = pkt_devs[idx];
+ if (pd && (pd->pkt_dev == pkt_dev))
+ break;
+ }
+ if (idx == MAX_WRITERS) {
+ pr_debug("dev not setup\n");
+ ret = -ENXIO;
+ goto out;
+ }
+
+ if (pd->refcnt > 0) {
+ ret = -EBUSY;
+ goto out;
+ }
+ if (!IS_ERR(pd->cdrw.thread))
+ kthread_stop(pd->cdrw.thread);
+
+ pkt_devs[idx] = NULL;
+
+ pkt_debugfs_dev_remove(pd);
+ pkt_sysfs_dev_remove(pd);
+
+ blkdev_put(pd->bdev, FMODE_READ | FMODE_NDELAY);
+
+ remove_proc_entry(pd->name, pkt_proc);
+ pkt_dbg(1, pd, "writer unmapped\n");
+
+ del_gendisk(pd->disk);
+ put_disk(pd->disk);
+
+ mempool_exit(&pd->rb_pool);
+ kfree(pd);
+
+ /* This is safe: open() is still holding a reference. */
+ module_put(THIS_MODULE);
+
+out:
+ mutex_unlock(&ctl_mutex);
+ return ret;
+}
+
+static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd)
+{
+ struct pktcdvd_device *pd;
+
+ mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+ pd = pkt_find_dev_from_minor(ctrl_cmd->dev_index);
+ if (pd) {
+ ctrl_cmd->dev = new_encode_dev(pd->bdev->bd_dev);
+ ctrl_cmd->pkt_dev = new_encode_dev(pd->pkt_dev);
+ } else {
+ ctrl_cmd->dev = 0;
+ ctrl_cmd->pkt_dev = 0;
+ }
+ ctrl_cmd->num_devices = MAX_WRITERS;
+
+ mutex_unlock(&ctl_mutex);
+}
+
+static long pkt_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ void __user *argp = (void __user *)arg;
+ struct pkt_ctrl_command ctrl_cmd;
+ int ret = 0;
+ dev_t pkt_dev = 0;
+
+ if (cmd != PACKET_CTRL_CMD)
+ return -ENOTTY;
+
+ if (copy_from_user(&ctrl_cmd, argp, sizeof(struct pkt_ctrl_command)))
+ return -EFAULT;
+
+ switch (ctrl_cmd.command) {
+ case PKT_CTRL_CMD_SETUP:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ ret = pkt_setup_dev(new_decode_dev(ctrl_cmd.dev), &pkt_dev);
+ ctrl_cmd.pkt_dev = new_encode_dev(pkt_dev);
+ break;
+ case PKT_CTRL_CMD_TEARDOWN:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ ret = pkt_remove_dev(new_decode_dev(ctrl_cmd.pkt_dev));
+ break;
+ case PKT_CTRL_CMD_STATUS:
+ pkt_get_status(&ctrl_cmd);
+ break;
+ default:
+ return -ENOTTY;
+ }
+
+ if (copy_to_user(argp, &ctrl_cmd, sizeof(struct pkt_ctrl_command)))
+ return -EFAULT;
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long pkt_ctl_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ return pkt_ctl_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
+}
+#endif
+
+static const struct file_operations pkt_ctl_fops = {
+ .open = nonseekable_open,
+ .unlocked_ioctl = pkt_ctl_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = pkt_ctl_compat_ioctl,
+#endif
+ .owner = THIS_MODULE,
+ .llseek = no_llseek,
+};
+
+static struct miscdevice pkt_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = DRIVER_NAME,
+ .nodename = "pktcdvd/control",
+ .fops = &pkt_ctl_fops
+};
+
+static int __init pkt_init(void)
+{
+ int ret;
+
+ mutex_init(&ctl_mutex);
+
+ ret = mempool_init_kmalloc_pool(&psd_pool, PSD_POOL_SIZE,
+ sizeof(struct packet_stacked_data));
+ if (ret)
+ return ret;
+ ret = bioset_init(&pkt_bio_set, BIO_POOL_SIZE, 0, 0);
+ if (ret) {
+ mempool_exit(&psd_pool);
+ return ret;
+ }
+
+ ret = register_blkdev(pktdev_major, DRIVER_NAME);
+ if (ret < 0) {
+ pr_err("unable to register block device\n");
+ goto out2;
+ }
+ if (!pktdev_major)
+ pktdev_major = ret;
+
+ ret = pkt_sysfs_init();
+ if (ret)
+ goto out;
+
+ pkt_debugfs_init();
+
+ ret = misc_register(&pkt_misc);
+ if (ret) {
+ pr_err("unable to register misc device\n");
+ goto out_misc;
+ }
+
+ pkt_proc = proc_mkdir("driver/"DRIVER_NAME, NULL);
+
+ return 0;
+
+out_misc:
+ pkt_debugfs_cleanup();
+ pkt_sysfs_cleanup();
+out:
+ unregister_blkdev(pktdev_major, DRIVER_NAME);
+out2:
+ mempool_exit(&psd_pool);
+ bioset_exit(&pkt_bio_set);
+ return ret;
+}
+
+static void __exit pkt_exit(void)
+{
+ remove_proc_entry("driver/"DRIVER_NAME, NULL);
+ misc_deregister(&pkt_misc);
+
+ pkt_debugfs_cleanup();
+ pkt_sysfs_cleanup();
+
+ unregister_blkdev(pktdev_major, DRIVER_NAME);
+ mempool_exit(&psd_pool);
+ bioset_exit(&pkt_bio_set);
+}
+
+MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives");
+MODULE_AUTHOR("Jens Axboe <axboe@suse.de>");
+MODULE_LICENSE("GPL");
+
+module_init(pkt_init);
+module_exit(pkt_exit);
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index c76e0148eada..574e470b220b 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -587,6 +587,8 @@ static void ps3vram_submit_bio(struct bio *bio)
dev_dbg(&dev->core, "%s\n", __func__);
bio = bio_split_to_limits(bio);
+ if (!bio)
+ return;
spin_lock_irq(&priv->lock);
busy = !bio_list_empty(&priv->list);
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 78334da74d8b..5eb8c7855970 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -1440,7 +1440,7 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
goto out_alloc;
}
- ret = ida_alloc_max(&index_ida, 1 << (MINORBITS - RNBD_PART_BITS),
+ ret = ida_alloc_max(&index_ida, (1 << (MINORBITS - RNBD_PART_BITS)) - 1,
GFP_KERNEL);
if (ret < 0) {
pr_err("Failed to initialize device '%s' from session %s, allocating idr failed, err: %d\n",
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index e9de9d846b73..6368b56eacf1 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -137,7 +137,7 @@ struct ublk_device {
char *__queues;
- unsigned short queue_size;
+ unsigned int queue_size;
struct ublksrv_ctrl_dev_info dev_info;
struct blk_mq_tag_set tag_set;
@@ -1992,6 +1992,9 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
int ret = -EINVAL;
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ return -EAGAIN;
+
ublk_ctrl_cmd_dump(cmd);
if (!(issue_flags & IO_URING_F_SQE128))
@@ -2089,13 +2092,12 @@ static void __exit ublk_exit(void)
struct ublk_device *ub;
int id;
- class_destroy(ublk_chr_class);
-
- misc_deregister(&ublk_misc);
-
idr_for_each_entry(&ublk_index_idr, ub, id)
ublk_remove(ub);
+ class_destroy(ublk_chr_class);
+ misc_deregister(&ublk_misc);
+
idr_destroy(&ublk_index_idr);
unregister_chrdev_region(ublk_chr_devt, UBLK_MINORS);
}
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 68bd2f7961b3..6a77fa917428 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -315,22 +315,35 @@ static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx)
virtqueue_notify(vq->vq);
}
+static blk_status_t virtblk_fail_to_queue(struct request *req, int rc)
+{
+ virtblk_cleanup_cmd(req);
+ switch (rc) {
+ case -ENOSPC:
+ return BLK_STS_DEV_RESOURCE;
+ case -ENOMEM:
+ return BLK_STS_RESOURCE;
+ default:
+ return BLK_STS_IOERR;
+ }
+}
+
static blk_status_t virtblk_prep_rq(struct blk_mq_hw_ctx *hctx,
struct virtio_blk *vblk,
struct request *req,
struct virtblk_req *vbr)
{
blk_status_t status;
+ int num;
status = virtblk_setup_cmd(vblk->vdev, req, vbr);
if (unlikely(status))
return status;
- vbr->sg_table.nents = virtblk_map_data(hctx, req, vbr);
- if (unlikely(vbr->sg_table.nents < 0)) {
- virtblk_cleanup_cmd(req);
- return BLK_STS_RESOURCE;
- }
+ num = virtblk_map_data(hctx, req, vbr);
+ if (unlikely(num < 0))
+ return virtblk_fail_to_queue(req, -ENOMEM);
+ vbr->sg_table.nents = num;
blk_mq_start_request(req);
@@ -364,15 +377,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_stop_hw_queue(hctx);
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
virtblk_unmap_data(req, vbr);
- virtblk_cleanup_cmd(req);
- switch (err) {
- case -ENOSPC:
- return BLK_STS_DEV_RESOURCE;
- case -ENOMEM:
- return BLK_STS_RESOURCE;
- default:
- return BLK_STS_IOERR;
- }
+ return virtblk_fail_to_queue(req, err);
}
if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
@@ -991,7 +996,7 @@ static int virtblk_probe(struct virtio_device *vdev)
blk_queue_max_segments(q, sg_elems);
/* No real sector limit. */
- blk_queue_max_hw_sectors(q, -1U);
+ blk_queue_max_hw_sectors(q, UINT_MAX);
max_size = virtio_max_dma_size(vdev);
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index c0227dfa4688..4807af1d5805 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -524,7 +524,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
return 0;
}
-static int xen_blkbk_remove(struct xenbus_device *dev)
+static void xen_blkbk_remove(struct xenbus_device *dev)
{
struct backend_info *be = dev_get_drvdata(&dev->dev);
@@ -547,8 +547,6 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
/* Put the reference we set in xen_blkif_alloc(). */
xen_blkif_put(be->blkif);
}
-
- return 0;
}
int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index b28489290323..23ed258b57f0 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2467,7 +2467,7 @@ static void blkback_changed(struct xenbus_device *dev,
}
}
-static int blkfront_remove(struct xenbus_device *xbdev)
+static void blkfront_remove(struct xenbus_device *xbdev)
{
struct blkfront_info *info = dev_get_drvdata(&xbdev->dev);
@@ -2488,7 +2488,6 @@ static int blkfront_remove(struct xenbus_device *xbdev)
}
kfree(info);
- return 0;
}
static int blkfront_is_ready(struct xenbus_device *dev)
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 6eddc23e49d9..bbe9cf1cae27 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -2164,10 +2164,17 @@ static void qca_serdev_shutdown(struct device *dev)
int timeout = msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS);
struct serdev_device *serdev = to_serdev_device(dev);
struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev);
+ struct hci_uart *hu = &qcadev->serdev_hu;
+ struct hci_dev *hdev = hu->hdev;
+ struct qca_data *qca = hu->priv;
const u8 ibs_wake_cmd[] = { 0xFD };
const u8 edl_reset_soc_cmd[] = { 0x01, 0x00, 0xFC, 0x01, 0x05 };
if (qcadev->btsoc_type == QCA_QCA6390) {
+ if (test_bit(QCA_BT_OFF, &qca->flags) ||
+ !test_bit(HCI_RUNNING, &hdev->flags))
+ return;
+
serdev_device_write_flush(serdev);
ret = serdev_device_write_buf(serdev, ibs_wake_cmd,
sizeof(ibs_wake_cmd));
diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c
index 3aa91aed3bf7..226e87b85116 100644
--- a/drivers/bus/sunxi-rsb.c
+++ b/drivers/bus/sunxi-rsb.c
@@ -857,7 +857,13 @@ static int __init sunxi_rsb_init(void)
return ret;
}
- return platform_driver_register(&sunxi_rsb_driver);
+ ret = platform_driver_register(&sunxi_rsb_driver);
+ if (ret) {
+ bus_unregister(&sunxi_rsb_bus);
+ return ret;
+ }
+
+ return 0;
}
module_init(sunxi_rsb_init);
diff --git a/drivers/char/tpm/eventlog/acpi.c b/drivers/char/tpm/eventlog/acpi.c
index 0913d3eb8d51..40360e599bc3 100644
--- a/drivers/char/tpm/eventlog/acpi.c
+++ b/drivers/char/tpm/eventlog/acpi.c
@@ -14,6 +14,7 @@
* Access to the event log extended by the TCG BIOS of PC platform
*/
+#include <linux/device.h>
#include <linux/seq_file.h>
#include <linux/fs.h>
#include <linux/security.h>
@@ -135,7 +136,7 @@ int tpm_read_log_acpi(struct tpm_chip *chip)
}
/* malloc EventLog space */
- log->bios_event_log = kmalloc(len, GFP_KERNEL);
+ log->bios_event_log = devm_kmalloc(&chip->dev, len, GFP_KERNEL);
if (!log->bios_event_log)
return -ENOMEM;
@@ -160,7 +161,7 @@ int tpm_read_log_acpi(struct tpm_chip *chip)
return format;
err:
- kfree(log->bios_event_log);
+ devm_kfree(&chip->dev, log->bios_event_log);
log->bios_event_log = NULL;
return ret;
}
diff --git a/drivers/char/tpm/eventlog/efi.c b/drivers/char/tpm/eventlog/efi.c
index e6cb9d525e30..4e9d7c2bf32e 100644
--- a/drivers/char/tpm/eventlog/efi.c
+++ b/drivers/char/tpm/eventlog/efi.c
@@ -6,6 +6,7 @@
* Thiebaud Weksteen <tweek@google.com>
*/
+#include <linux/device.h>
#include <linux/efi.h>
#include <linux/tpm_eventlog.h>
@@ -55,7 +56,7 @@ int tpm_read_log_efi(struct tpm_chip *chip)
}
/* malloc EventLog space */
- log->bios_event_log = kmemdup(log_tbl->log, log_size, GFP_KERNEL);
+ log->bios_event_log = devm_kmemdup(&chip->dev, log_tbl->log, log_size, GFP_KERNEL);
if (!log->bios_event_log) {
ret = -ENOMEM;
goto out;
@@ -76,7 +77,7 @@ int tpm_read_log_efi(struct tpm_chip *chip)
MEMREMAP_WB);
if (!final_tbl) {
pr_err("Could not map UEFI TPM final log\n");
- kfree(log->bios_event_log);
+ devm_kfree(&chip->dev, log->bios_event_log);
ret = -ENOMEM;
goto out;
}
@@ -91,11 +92,11 @@ int tpm_read_log_efi(struct tpm_chip *chip)
* Allocate memory for the 'combined log' where we will append the
* 'final events log' to.
*/
- tmp = krealloc(log->bios_event_log,
- log_size + final_events_log_size,
- GFP_KERNEL);
+ tmp = devm_krealloc(&chip->dev, log->bios_event_log,
+ log_size + final_events_log_size,
+ GFP_KERNEL);
if (!tmp) {
- kfree(log->bios_event_log);
+ devm_kfree(&chip->dev, log->bios_event_log);
ret = -ENOMEM;
goto out;
}
diff --git a/drivers/char/tpm/eventlog/of.c b/drivers/char/tpm/eventlog/of.c
index a9ce66d09a75..930fe43d5daf 100644
--- a/drivers/char/tpm/eventlog/of.c
+++ b/drivers/char/tpm/eventlog/of.c
@@ -10,13 +10,44 @@
* Read the event log created by the firmware on PPC64
*/
+#include <linux/device.h>
#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_reserved_mem.h>
#include <linux/tpm_eventlog.h>
#include "../tpm.h"
#include "common.h"
+static int tpm_read_log_memory_region(struct tpm_chip *chip)
+{
+ struct device_node *node;
+ struct resource res;
+ int rc;
+
+ node = of_parse_phandle(chip->dev.parent->of_node, "memory-region", 0);
+ if (!node)
+ return -ENODEV;
+
+ rc = of_address_to_resource(node, 0, &res);
+ of_node_put(node);
+ if (rc)
+ return rc;
+
+ chip->log.bios_event_log = devm_memremap(&chip->dev, res.start, resource_size(&res),
+ MEMREMAP_WB);
+ if (IS_ERR(chip->log.bios_event_log))
+ return -ENOMEM;
+
+ chip->log.bios_event_log_end = chip->log.bios_event_log + resource_size(&res);
+
+ return chip->flags & TPM_CHIP_FLAG_TPM2 ? EFI_TCG2_EVENT_LOG_FORMAT_TCG_2 :
+ EFI_TCG2_EVENT_LOG_FORMAT_TCG_1_2;
+}
+
int tpm_read_log_of(struct tpm_chip *chip)
{
struct device_node *np;
@@ -38,7 +69,7 @@ int tpm_read_log_of(struct tpm_chip *chip)
sizep = of_get_property(np, "linux,sml-size", NULL);
basep = of_get_property(np, "linux,sml-base", NULL);
if (sizep == NULL && basep == NULL)
- return -ENODEV;
+ return tpm_read_log_memory_region(chip);
if (sizep == NULL || basep == NULL)
return -EIO;
@@ -65,7 +96,7 @@ int tpm_read_log_of(struct tpm_chip *chip)
return -EIO;
}
- log->bios_event_log = kmemdup(__va(base), size, GFP_KERNEL);
+ log->bios_event_log = devm_kmemdup(&chip->dev, __va(base), size, GFP_KERNEL);
if (!log->bios_event_log)
return -ENOMEM;
diff --git a/drivers/char/tpm/st33zp24/i2c.c b/drivers/char/tpm/st33zp24/i2c.c
index 8156bb2af78c..c4d0b744e3cc 100644
--- a/drivers/char/tpm/st33zp24/i2c.c
+++ b/drivers/char/tpm/st33zp24/i2c.c
@@ -101,8 +101,7 @@ static const struct st33zp24_phy_ops i2c_phy_ops = {
* @return: 0 in case of success.
* -1 in other case.
*/
-static int st33zp24_i2c_probe(struct i2c_client *client,
- const struct i2c_device_id *id)
+static int st33zp24_i2c_probe(struct i2c_client *client)
{
struct st33zp24_i2c_phy *phy;
@@ -161,7 +160,7 @@ static struct i2c_driver st33zp24_i2c_driver = {
.of_match_table = of_match_ptr(of_st33zp24_i2c_match),
.acpi_match_table = ACPI_PTR(st33zp24_i2c_acpi_match),
},
- .probe = st33zp24_i2c_probe,
+ .probe_new = st33zp24_i2c_probe,
.remove = st33zp24_i2c_remove,
.id_table = st33zp24_i2c_id
};
diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index 741d8f3e8fb3..b99f55f2d4fd 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -267,7 +267,6 @@ static void tpm_dev_release(struct device *dev)
idr_remove(&dev_nums_idr, chip->dev_num);
mutex_unlock(&idr_lock);
- kfree(chip->log.bios_event_log);
kfree(chip->work_space.context_buf);
kfree(chip->work_space.session_buf);
kfree(chip->allocated_banks);
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index d69905233aff..7e513b771832 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -412,7 +412,9 @@ int tpm_pm_suspend(struct device *dev)
}
suspended:
- return rc;
+ if (rc)
+ dev_err(dev, "Ignoring error %d while suspending\n", rc);
+ return 0;
}
EXPORT_SYMBOL_GPL(tpm_pm_suspend);
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index 65d03867e114..93545be190a5 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -777,10 +777,12 @@ out:
int tpm2_find_cc(struct tpm_chip *chip, u32 cc)
{
+ u32 cc_mask;
int i;
+ cc_mask = 1 << TPM2_CC_ATTR_VENDOR | GENMASK(15, 0);
for (i = 0; i < chip->nr_commands; i++)
- if (cc == (chip->cc_attrs_tbl[i] & GENMASK(15, 0)))
+ if (cc == (chip->cc_attrs_tbl[i] & cc_mask))
return i;
return -1;
diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
index 7e9da671a0e8..d43a0d7b97a8 100644
--- a/drivers/char/tpm/tpm_crb.c
+++ b/drivers/char/tpm/tpm_crb.c
@@ -98,6 +98,8 @@ struct crb_priv {
u8 __iomem *rsp;
u32 cmd_size;
u32 smc_func_id;
+ u32 __iomem *pluton_start_addr;
+ u32 __iomem *pluton_reply_addr;
};
struct tpm2_crb_smc {
@@ -108,6 +110,11 @@ struct tpm2_crb_smc {
u32 smc_func_id;
};
+struct tpm2_crb_pluton {
+ u64 start_addr;
+ u64 reply_addr;
+};
+
static bool crb_wait_for_reg_32(u32 __iomem *reg, u32 mask, u32 value,
unsigned long timeout)
{
@@ -127,6 +134,25 @@ static bool crb_wait_for_reg_32(u32 __iomem *reg, u32 mask, u32 value,
return ((ioread32(reg) & mask) == value);
}
+static int crb_try_pluton_doorbell(struct crb_priv *priv, bool wait_for_complete)
+{
+ if (priv->sm != ACPI_TPM2_COMMAND_BUFFER_WITH_PLUTON)
+ return 0;
+
+ if (!crb_wait_for_reg_32(priv->pluton_reply_addr, ~0, 1, TPM2_TIMEOUT_C))
+ return -ETIME;
+
+ iowrite32(1, priv->pluton_start_addr);
+ if (wait_for_complete == false)
+ return 0;
+
+ if (!crb_wait_for_reg_32(priv->pluton_start_addr,
+ 0xffffffff, 0, 200))
+ return -ETIME;
+
+ return 0;
+}
+
/**
* __crb_go_idle - request tpm crb device to go the idle state
*
@@ -145,6 +171,8 @@ static bool crb_wait_for_reg_32(u32 __iomem *reg, u32 mask, u32 value,
*/
static int __crb_go_idle(struct device *dev, struct crb_priv *priv)
{
+ int rc;
+
if ((priv->sm == ACPI_TPM2_START_METHOD) ||
(priv->sm == ACPI_TPM2_COMMAND_BUFFER_WITH_START_METHOD) ||
(priv->sm == ACPI_TPM2_COMMAND_BUFFER_WITH_ARM_SMC))
@@ -152,6 +180,10 @@ static int __crb_go_idle(struct device *dev, struct crb_priv *priv)
iowrite32(CRB_CTRL_REQ_GO_IDLE, &priv->regs_t->ctrl_req);
+ rc = crb_try_pluton_doorbell(priv, true);
+ if (rc)
+ return rc;
+
if (!crb_wait_for_reg_32(&priv->regs_t->ctrl_req,
CRB_CTRL_REQ_GO_IDLE/* mask */,
0, /* value */
@@ -188,12 +220,19 @@ static int crb_go_idle(struct tpm_chip *chip)
*/
static int __crb_cmd_ready(struct device *dev, struct crb_priv *priv)
{
+ int rc;
+
if ((priv->sm == ACPI_TPM2_START_METHOD) ||
(priv->sm == ACPI_TPM2_COMMAND_BUFFER_WITH_START_METHOD) ||
(priv->sm == ACPI_TPM2_COMMAND_BUFFER_WITH_ARM_SMC))
return 0;
iowrite32(CRB_CTRL_REQ_CMD_READY, &priv->regs_t->ctrl_req);
+
+ rc = crb_try_pluton_doorbell(priv, true);
+ if (rc)
+ return rc;
+
if (!crb_wait_for_reg_32(&priv->regs_t->ctrl_req,
CRB_CTRL_REQ_CMD_READY /* mask */,
0, /* value */
@@ -371,6 +410,10 @@ static int crb_send(struct tpm_chip *chip, u8 *buf, size_t len)
return -E2BIG;
}
+ /* Seems to be necessary for every command */
+ if (priv->sm == ACPI_TPM2_COMMAND_BUFFER_WITH_PLUTON)
+ __crb_cmd_ready(&chip->dev, priv);
+
memcpy_toio(priv->cmd, buf, len);
/* Make sure that cmd is populated before issuing start. */
@@ -394,7 +437,10 @@ static int crb_send(struct tpm_chip *chip, u8 *buf, size_t len)
rc = tpm_crb_smc_start(&chip->dev, priv->smc_func_id);
}
- return rc;
+ if (rc)
+ return rc;
+
+ return crb_try_pluton_doorbell(priv, false);
}
static void crb_cancel(struct tpm_chip *chip)
@@ -524,15 +570,18 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv,
return ret;
acpi_dev_free_resource_list(&acpi_resource_list);
- if (resource_type(iores_array) != IORESOURCE_MEM) {
- dev_err(dev, FW_BUG "TPM2 ACPI table does not define a memory resource\n");
- return -EINVAL;
- } else if (resource_type(iores_array + TPM_CRB_MAX_RESOURCES) ==
- IORESOURCE_MEM) {
- dev_warn(dev, "TPM2 ACPI table defines too many memory resources\n");
- memset(iores_array + TPM_CRB_MAX_RESOURCES,
- 0, sizeof(*iores_array));
- iores_array[TPM_CRB_MAX_RESOURCES].flags = 0;
+ /* Pluton doesn't appear to define ACPI memory regions */
+ if (priv->sm != ACPI_TPM2_COMMAND_BUFFER_WITH_PLUTON) {
+ if (resource_type(iores_array) != IORESOURCE_MEM) {
+ dev_err(dev, FW_BUG "TPM2 ACPI table does not define a memory resource\n");
+ return -EINVAL;
+ } else if (resource_type(iores_array + TPM_CRB_MAX_RESOURCES) ==
+ IORESOURCE_MEM) {
+ dev_warn(dev, "TPM2 ACPI table defines too many memory resources\n");
+ memset(iores_array + TPM_CRB_MAX_RESOURCES,
+ 0, sizeof(*iores_array));
+ iores_array[TPM_CRB_MAX_RESOURCES].flags = 0;
+ }
}
iores = NULL;
@@ -656,6 +705,22 @@ out_relinquish_locality:
return ret;
}
+static int crb_map_pluton(struct device *dev, struct crb_priv *priv,
+ struct acpi_table_tpm2 *buf, struct tpm2_crb_pluton *crb_pluton)
+{
+ priv->pluton_start_addr = crb_map_res(dev, NULL, NULL,
+ crb_pluton->start_addr, 4);
+ if (IS_ERR(priv->pluton_start_addr))
+ return PTR_ERR(priv->pluton_start_addr);
+
+ priv->pluton_reply_addr = crb_map_res(dev, NULL, NULL,
+ crb_pluton->reply_addr, 4);
+ if (IS_ERR(priv->pluton_reply_addr))
+ return PTR_ERR(priv->pluton_reply_addr);
+
+ return 0;
+}
+
static int crb_acpi_add(struct acpi_device *device)
{
struct acpi_table_tpm2 *buf;
@@ -663,6 +728,7 @@ static int crb_acpi_add(struct acpi_device *device)
struct tpm_chip *chip;
struct device *dev = &device->dev;
struct tpm2_crb_smc *crb_smc;
+ struct tpm2_crb_pluton *crb_pluton;
acpi_status status;
u32 sm;
int rc;
@@ -700,6 +766,20 @@ static int crb_acpi_add(struct acpi_device *device)
priv->smc_func_id = crb_smc->smc_func_id;
}
+ if (sm == ACPI_TPM2_COMMAND_BUFFER_WITH_PLUTON) {
+ if (buf->header.length < (sizeof(*buf) + sizeof(*crb_pluton))) {
+ dev_err(dev,
+ FW_BUG "TPM2 ACPI table has wrong size %u for start method type %d\n",
+ buf->header.length,
+ ACPI_TPM2_COMMAND_BUFFER_WITH_PLUTON);
+ return -EINVAL;
+ }
+ crb_pluton = ACPI_ADD_PTR(struct tpm2_crb_pluton, buf, sizeof(*buf));
+ rc = crb_map_pluton(dev, priv, buf, crb_pluton);
+ if (rc)
+ return rc;
+ }
+
priv->sm = sm;
priv->hid = acpi_device_hid(device);
diff --git a/drivers/char/tpm/tpm_i2c_atmel.c b/drivers/char/tpm/tpm_i2c_atmel.c
index 4be3677c1463..8f77154e0550 100644
--- a/drivers/char/tpm/tpm_i2c_atmel.c
+++ b/drivers/char/tpm/tpm_i2c_atmel.c
@@ -146,8 +146,7 @@ static const struct tpm_class_ops i2c_atmel = {
.req_canceled = i2c_atmel_req_canceled,
};
-static int i2c_atmel_probe(struct i2c_client *client,
- const struct i2c_device_id *id)
+static int i2c_atmel_probe(struct i2c_client *client)
{
struct tpm_chip *chip;
struct device *dev = &client->dev;
@@ -204,7 +203,7 @@ static SIMPLE_DEV_PM_OPS(i2c_atmel_pm_ops, tpm_pm_suspend, tpm_pm_resume);
static struct i2c_driver i2c_atmel_driver = {
.id_table = i2c_atmel_id,
- .probe = i2c_atmel_probe,
+ .probe_new = i2c_atmel_probe,
.remove = i2c_atmel_remove,
.driver = {
.name = I2C_DRIVER_NAME,
diff --git a/drivers/char/tpm/tpm_i2c_infineon.c b/drivers/char/tpm/tpm_i2c_infineon.c
index fd3c3661e646..7cdaff52a96d 100644
--- a/drivers/char/tpm/tpm_i2c_infineon.c
+++ b/drivers/char/tpm/tpm_i2c_infineon.c
@@ -681,8 +681,7 @@ MODULE_DEVICE_TABLE(of, tpm_tis_i2c_of_match);
static SIMPLE_DEV_PM_OPS(tpm_tis_i2c_ops, tpm_pm_suspend, tpm_pm_resume);
-static int tpm_tis_i2c_probe(struct i2c_client *client,
- const struct i2c_device_id *id)
+static int tpm_tis_i2c_probe(struct i2c_client *client)
{
int rc;
struct device *dev = &(client->dev);
@@ -717,7 +716,7 @@ static void tpm_tis_i2c_remove(struct i2c_client *client)
static struct i2c_driver tpm_tis_i2c_driver = {
.id_table = tpm_tis_i2c_table,
- .probe = tpm_tis_i2c_probe,
+ .probe_new = tpm_tis_i2c_probe,
.remove = tpm_tis_i2c_remove,
.driver = {
.name = "tpm_i2c_infineon",
diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c
index 95c37350cc8e..a026e98add50 100644
--- a/drivers/char/tpm/tpm_i2c_nuvoton.c
+++ b/drivers/char/tpm/tpm_i2c_nuvoton.c
@@ -522,9 +522,9 @@ static int get_vid(struct i2c_client *client, u32 *res)
return 0;
}
-static int i2c_nuvoton_probe(struct i2c_client *client,
- const struct i2c_device_id *id)
+static int i2c_nuvoton_probe(struct i2c_client *client)
{
+ const struct i2c_device_id *id = i2c_client_get_device_id(client);
int rc;
struct tpm_chip *chip;
struct device *dev = &client->dev;
@@ -650,7 +650,7 @@ static SIMPLE_DEV_PM_OPS(i2c_nuvoton_pm_ops, tpm_pm_suspend, tpm_pm_resume);
static struct i2c_driver i2c_nuvoton_driver = {
.id_table = i2c_nuvoton_id,
- .probe = i2c_nuvoton_probe,
+ .probe_new = i2c_nuvoton_probe,
.remove = i2c_nuvoton_remove,
.driver = {
.name = "tpm_i2c_nuvoton",
diff --git a/drivers/char/tpm/tpm_tis_i2c.c b/drivers/char/tpm/tpm_tis_i2c.c
index f3a7251c8e38..c8c34adc14c0 100644
--- a/drivers/char/tpm/tpm_tis_i2c.c
+++ b/drivers/char/tpm/tpm_tis_i2c.c
@@ -312,8 +312,7 @@ static const struct tpm_tis_phy_ops tpm_i2c_phy_ops = {
.verify_crc = tpm_tis_i2c_verify_crc,
};
-static int tpm_tis_i2c_probe(struct i2c_client *dev,
- const struct i2c_device_id *id)
+static int tpm_tis_i2c_probe(struct i2c_client *dev)
{
struct tpm_tis_i2c_phy *phy;
const u8 crc_enable = 1;
@@ -380,7 +379,7 @@ static struct i2c_driver tpm_tis_i2c_driver = {
.pm = &tpm_tis_pm,
.of_match_table = of_match_ptr(of_tis_i2c_match),
},
- .probe = tpm_tis_i2c_probe,
+ .probe_new = tpm_tis_i2c_probe,
.remove = tpm_tis_i2c_remove,
.id_table = tpm_tis_i2c_id,
};
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
index 379291826261..80cca3b83b22 100644
--- a/drivers/char/tpm/xen-tpmfront.c
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -360,14 +360,13 @@ static int tpmfront_probe(struct xenbus_device *dev,
return tpm_chip_register(priv->chip);
}
-static int tpmfront_remove(struct xenbus_device *dev)
+static void tpmfront_remove(struct xenbus_device *dev)
{
struct tpm_chip *chip = dev_get_drvdata(&dev->dev);
struct tpm_private *priv = dev_get_drvdata(&chip->dev);
tpm_chip_unregister(chip);
ring_free(priv);
dev_set_drvdata(&chip->dev, NULL);
- return 0;
}
static int tpmfront_resume(struct xenbus_device *dev)
diff --git a/drivers/clk/ingenic/jz4760-cgu.c b/drivers/clk/ingenic/jz4760-cgu.c
index ecd395ac8a28..e407f00bd594 100644
--- a/drivers/clk/ingenic/jz4760-cgu.c
+++ b/drivers/clk/ingenic/jz4760-cgu.c
@@ -58,7 +58,7 @@ jz4760_cgu_calc_m_n_od(const struct ingenic_cgu_pll_info *pll_info,
unsigned long rate, unsigned long parent_rate,
unsigned int *pm, unsigned int *pn, unsigned int *pod)
{
- unsigned int m, n, od, m_max = (1 << pll_info->m_bits) - 2;
+ unsigned int m, n, od, m_max = (1 << pll_info->m_bits) - 1;
/* The frequency after the N divider must be between 1 and 50 MHz. */
n = parent_rate / (1 * MHZ);
@@ -66,19 +66,17 @@ jz4760_cgu_calc_m_n_od(const struct ingenic_cgu_pll_info *pll_info,
/* The N divider must be >= 2. */
n = clamp_val(n, 2, 1 << pll_info->n_bits);
- for (;; n >>= 1) {
- od = (unsigned int)-1;
+ rate /= MHZ;
+ parent_rate /= MHZ;
- do {
- m = (rate / MHZ) * (1 << ++od) * n / (parent_rate / MHZ);
- } while ((m > m_max || m & 1) && (od < 4));
-
- if (od < 4 && m >= 4 && m <= m_max)
- break;
+ for (m = m_max; m >= m_max && n >= 2; n--) {
+ m = rate * n / parent_rate;
+ od = m & 1;
+ m <<= od;
}
*pm = m;
- *pn = n;
+ *pn = n + 1;
*pod = 1 << od;
}
diff --git a/drivers/clk/microchip/clk-mpfs-ccc.c b/drivers/clk/microchip/clk-mpfs-ccc.c
index 32aae880a14f..0ddc73e07be4 100644
--- a/drivers/clk/microchip/clk-mpfs-ccc.c
+++ b/drivers/clk/microchip/clk-mpfs-ccc.c
@@ -164,12 +164,11 @@ static int mpfs_ccc_register_outputs(struct device *dev, struct mpfs_ccc_out_hw_
for (unsigned int i = 0; i < num_clks; i++) {
struct mpfs_ccc_out_hw_clock *out_hw = &out_hws[i];
- char *name = devm_kzalloc(dev, 23, GFP_KERNEL);
+ char *name = devm_kasprintf(dev, GFP_KERNEL, "%s_out%u", parent->name, i);
if (!name)
return -ENOMEM;
- snprintf(name, 23, "%s_out%u", parent->name, i);
out_hw->divider.hw.init = CLK_HW_INIT_HW(name, &parent->hw, &clk_divider_ops, 0);
out_hw->divider.reg = data->pll_base[i / MPFS_CCC_OUTPUTS_PER_PLL] +
out_hw->reg_offset;
@@ -201,14 +200,13 @@ static int mpfs_ccc_register_plls(struct device *dev, struct mpfs_ccc_pll_hw_clo
for (unsigned int i = 0; i < num_clks; i++) {
struct mpfs_ccc_pll_hw_clock *pll_hw = &pll_hws[i];
- char *name = devm_kzalloc(dev, 18, GFP_KERNEL);
- if (!name)
+ pll_hw->name = devm_kasprintf(dev, GFP_KERNEL, "ccc%s_pll%u",
+ strchrnul(dev->of_node->full_name, '@'), i);
+ if (!pll_hw->name)
return -ENOMEM;
pll_hw->base = data->pll_base[i];
- snprintf(name, 18, "ccc%s_pll%u", strchrnul(dev->of_node->full_name, '@'), i);
- pll_hw->name = (const char *)name;
pll_hw->hw.init = CLK_HW_INIT_PARENTS_DATA_FIXED_SIZE(pll_hw->name,
pll_hw->parents,
&mpfs_ccc_pll_ops, 0);
diff --git a/drivers/comedi/drivers/adv_pci1760.c b/drivers/comedi/drivers/adv_pci1760.c
index fcfc2e299110..27f3890f471d 100644
--- a/drivers/comedi/drivers/adv_pci1760.c
+++ b/drivers/comedi/drivers/adv_pci1760.c
@@ -58,7 +58,7 @@
#define PCI1760_CMD_CLR_IMB2 0x00 /* Clears IMB2 */
#define PCI1760_CMD_SET_DO 0x01 /* Set output state */
#define PCI1760_CMD_GET_DO 0x02 /* Read output status */
-#define PCI1760_CMD_GET_STATUS 0x03 /* Read current status */
+#define PCI1760_CMD_GET_STATUS 0x07 /* Read current status */
#define PCI1760_CMD_GET_FW_VER 0x0e /* Read firmware version */
#define PCI1760_CMD_GET_HW_VER 0x0f /* Read hardware version */
#define PCI1760_CMD_SET_PWM_HI(x) (0x10 + (x) * 2) /* Set "hi" period */
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 204e39006dda..c17bd845f5fc 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -307,6 +307,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
max_perf = min_perf;
amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true);
+ cpufreq_cpu_put(policy);
}
static int amd_get_min_freq(struct amd_cpudata *cpudata)
diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c
index d1801281cdd9..c11d22fd84c3 100644
--- a/drivers/cpufreq/apple-soc-cpufreq.c
+++ b/drivers/cpufreq/apple-soc-cpufreq.c
@@ -280,6 +280,7 @@ static int apple_soc_cpufreq_init(struct cpufreq_policy *policy)
policy->cpuinfo.transition_latency = transition_latency;
policy->dvfs_possible_from_any_cpu = true;
policy->fast_switch_possible = true;
+ policy->suspend_freq = freq_table[0].frequency;
if (policy_has_boost_freq(policy)) {
ret = cpufreq_enable_boost_support();
@@ -321,7 +322,6 @@ static struct cpufreq_driver apple_soc_cpufreq_driver = {
.flags = CPUFREQ_HAVE_GOVERNOR_PER_POLICY |
CPUFREQ_NEED_INITIAL_FREQ_CHECK | CPUFREQ_IS_COOLING_DEV,
.verify = cpufreq_generic_frequency_table_verify,
- .attr = cpufreq_generic_attr,
.get = apple_soc_cpufreq_get_rate,
.init = apple_soc_cpufreq_init,
.exit = apple_soc_cpufreq_exit,
@@ -329,6 +329,7 @@ static struct cpufreq_driver apple_soc_cpufreq_driver = {
.fast_switch = apple_soc_cpufreq_fast_switch,
.register_em = cpufreq_register_em_with_opp,
.attr = apple_soc_cpufreq_hw_attr,
+ .suspend = cpufreq_generic_suspend,
};
static int __init apple_soc_cpufreq_module_init(void)
diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c
index c10fc33b29b1..b74289a95a17 100644
--- a/drivers/cpufreq/armada-37xx-cpufreq.c
+++ b/drivers/cpufreq/armada-37xx-cpufreq.c
@@ -445,7 +445,7 @@ static int __init armada37xx_cpufreq_driver_init(void)
return -ENODEV;
}
- clk = clk_get(cpu_dev, 0);
+ clk = clk_get(cpu_dev, NULL);
if (IS_ERR(clk)) {
dev_err(cpu_dev, "Cannot get clock for CPU0\n");
return PTR_ERR(clk);
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 432dfb4e8027..022e3555407c 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -487,7 +487,8 @@ static unsigned int get_perf_level_count(struct cpufreq_policy *policy)
cpu_data = policy->driver_data;
perf_caps = &cpu_data->perf_caps;
max_cap = arch_scale_cpu_capacity(cpu);
- min_cap = div_u64(max_cap * perf_caps->lowest_perf, perf_caps->highest_perf);
+ min_cap = div_u64((u64)max_cap * perf_caps->lowest_perf,
+ perf_caps->highest_perf);
if ((min_cap == 0) || (max_cap < min_cap))
return 0;
return 1 + max_cap / CPPC_EM_CAP_STEP - min_cap / CPPC_EM_CAP_STEP;
@@ -519,10 +520,10 @@ static int cppc_get_cpu_power(struct device *cpu_dev,
cpu_data = policy->driver_data;
perf_caps = &cpu_data->perf_caps;
max_cap = arch_scale_cpu_capacity(cpu_dev->id);
- min_cap = div_u64(max_cap * perf_caps->lowest_perf,
- perf_caps->highest_perf);
-
- perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap;
+ min_cap = div_u64((u64)max_cap * perf_caps->lowest_perf,
+ perf_caps->highest_perf);
+ perf_step = div_u64((u64)CPPC_EM_CAP_STEP * perf_caps->highest_perf,
+ max_cap);
min_step = min_cap / CPPC_EM_CAP_STEP;
max_step = max_cap / CPPC_EM_CAP_STEP;
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 8ab672883043..e85703651098 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -137,6 +137,7 @@ static const struct of_device_id blocklist[] __initconst = {
{ .compatible = "nvidia,tegra30", },
{ .compatible = "nvidia,tegra124", },
{ .compatible = "nvidia,tegra210", },
+ { .compatible = "nvidia,tegra234", },
{ .compatible = "qcom,apq8096", },
{ .compatible = "qcom,msm8996", },
@@ -150,6 +151,7 @@ static const struct of_device_id blocklist[] __initconst = {
{ .compatible = "qcom,sdm845", },
{ .compatible = "qcom,sm6115", },
{ .compatible = "qcom,sm6350", },
+ { .compatible = "qcom,sm6375", },
{ .compatible = "qcom,sm8150", },
{ .compatible = "qcom,sm8250", },
{ .compatible = "qcom,sm8350", },
diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
index 340fed35e45d..d3f55ca06ed3 100644
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -143,40 +143,42 @@ static unsigned long qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data)
return lval * xo_rate;
}
-/* Get the current frequency of the CPU (after throttling) */
-static unsigned int qcom_cpufreq_hw_get(unsigned int cpu)
+/* Get the frequency requested by the cpufreq core for the CPU */
+static unsigned int qcom_cpufreq_get_freq(unsigned int cpu)
{
struct qcom_cpufreq_data *data;
+ const struct qcom_cpufreq_soc_data *soc_data;
struct cpufreq_policy *policy;
+ unsigned int index;
policy = cpufreq_cpu_get_raw(cpu);
if (!policy)
return 0;
data = policy->driver_data;
+ soc_data = qcom_cpufreq.soc_data;
- return qcom_lmh_get_throttle_freq(data) / HZ_PER_KHZ;
+ index = readl_relaxed(data->base + soc_data->reg_perf_state);
+ index = min(index, LUT_MAX_ENTRIES - 1);
+
+ return policy->freq_table[index].frequency;
}
-/* Get the frequency requested by the cpufreq core for the CPU */
-static unsigned int qcom_cpufreq_get_freq(unsigned int cpu)
+static unsigned int qcom_cpufreq_hw_get(unsigned int cpu)
{
struct qcom_cpufreq_data *data;
- const struct qcom_cpufreq_soc_data *soc_data;
struct cpufreq_policy *policy;
- unsigned int index;
policy = cpufreq_cpu_get_raw(cpu);
if (!policy)
return 0;
data = policy->driver_data;
- soc_data = qcom_cpufreq.soc_data;
- index = readl_relaxed(data->base + soc_data->reg_perf_state);
- index = min(index, LUT_MAX_ENTRIES - 1);
+ if (data->throttle_irq >= 0)
+ return qcom_lmh_get_throttle_freq(data) / HZ_PER_KHZ;
- return policy->freq_table[index].frequency;
+ return qcom_cpufreq_get_freq(cpu);
}
static unsigned int qcom_cpufreq_hw_fast_switch(struct cpufreq_policy *policy,
@@ -649,9 +651,10 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev)
{
struct clk_hw_onecell_data *clk_data;
struct device *dev = &pdev->dev;
+ struct device_node *soc_node;
struct device *cpu_dev;
struct clk *clk;
- int ret, i, num_domains;
+ int ret, i, num_domains, reg_sz;
clk = clk_get(dev, "xo");
if (IS_ERR(clk))
@@ -679,7 +682,21 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev)
return ret;
/* Allocate qcom_cpufreq_data based on the available frequency domains in DT */
- num_domains = of_property_count_elems_of_size(dev->of_node, "reg", sizeof(u32) * 4);
+ soc_node = of_get_parent(dev->of_node);
+ if (!soc_node)
+ return -EINVAL;
+
+ ret = of_property_read_u32(soc_node, "#address-cells", &reg_sz);
+ if (ret)
+ goto of_exit;
+
+ ret = of_property_read_u32(soc_node, "#size-cells", &i);
+ if (ret)
+ goto of_exit;
+
+ reg_sz += i;
+
+ num_domains = of_property_count_elems_of_size(dev->of_node, "reg", sizeof(u32) * reg_sz);
if (num_domains <= 0)
return num_domains;
@@ -689,6 +706,8 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev)
return -ENOMEM;
qcom_cpufreq.soc_data = of_device_get_match_data(dev);
+ if (!qcom_cpufreq.soc_data)
+ return -ENODEV;
clk_data = devm_kzalloc(dev, struct_size(clk_data, hws, num_domains), GFP_KERNEL);
if (!clk_data)
@@ -743,6 +762,9 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev)
else
dev_dbg(dev, "QCOM CPUFreq HW driver initialized\n");
+of_exit:
+ of_node_put(soc_node);
+
return ret;
}
diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
index 53100fb9b07b..12205e2b53b4 100644
--- a/drivers/crypto/atmel-ecc.c
+++ b/drivers/crypto/atmel-ecc.c
@@ -3,7 +3,7 @@
* Microchip / Atmel ECC (I2C) driver.
*
* Copyright (c) 2017, Microchip Technology Inc.
- * Author: Tudor Ambarus <tudor.ambarus@microchip.com>
+ * Author: Tudor Ambarus
*/
#include <linux/delay.h>
@@ -411,6 +411,6 @@ static void __exit atmel_ecc_exit(void)
module_init(atmel_ecc_init);
module_exit(atmel_ecc_exit);
-MODULE_AUTHOR("Tudor Ambarus <tudor.ambarus@microchip.com>");
+MODULE_AUTHOR("Tudor Ambarus");
MODULE_DESCRIPTION("Microchip / Atmel ECC (I2C) driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c
index 81ce09bedda8..55bff1e13142 100644
--- a/drivers/crypto/atmel-i2c.c
+++ b/drivers/crypto/atmel-i2c.c
@@ -3,7 +3,7 @@
* Microchip / Atmel ECC (I2C) driver.
*
* Copyright (c) 2017, Microchip Technology Inc.
- * Author: Tudor Ambarus <tudor.ambarus@microchip.com>
+ * Author: Tudor Ambarus
*/
#include <linux/bitrev.h>
@@ -390,6 +390,6 @@ static void __exit atmel_i2c_exit(void)
module_init(atmel_i2c_init);
module_exit(atmel_i2c_exit);
-MODULE_AUTHOR("Tudor Ambarus <tudor.ambarus@microchip.com>");
+MODULE_AUTHOR("Tudor Ambarus");
MODULE_DESCRIPTION("Microchip / Atmel ECC (I2C) driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/crypto/atmel-i2c.h b/drivers/crypto/atmel-i2c.h
index 48929efe2a5b..35f7857a7f7c 100644
--- a/drivers/crypto/atmel-i2c.h
+++ b/drivers/crypto/atmel-i2c.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2017, Microchip Technology Inc.
- * Author: Tudor Ambarus <tudor.ambarus@microchip.com>
+ * Author: Tudor Ambarus
*/
#ifndef __ATMEL_I2C_H__
diff --git a/drivers/crypto/caam/blob_gen.c b/drivers/crypto/caam/blob_gen.c
index 1f65df489847..f46b161d2cda 100644
--- a/drivers/crypto/caam/blob_gen.c
+++ b/drivers/crypto/caam/blob_gen.c
@@ -104,7 +104,7 @@ int caam_process_blob(struct caam_blob_priv *priv,
}
ctrlpriv = dev_get_drvdata(jrdev->parent);
- moo = FIELD_GET(CSTA_MOO, ioread32(&ctrlpriv->ctrl->perfmon.status));
+ moo = FIELD_GET(CSTA_MOO, rd_reg32(&ctrlpriv->ctrl->perfmon.status));
if (moo != CSTA_MOO_SECURE && moo != CSTA_MOO_TRUSTED)
dev_warn(jrdev,
"using insecure test key, enable HAB to use unique device key!\n");
diff --git a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c
index e553ccadbcbc..e5876286828b 100644
--- a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c
+++ b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c
@@ -239,7 +239,8 @@ static int virtio_crypto_alg_skcipher_close_session(
pr_err("virtio_crypto: Close session failed status: %u, session_id: 0x%llx\n",
ctrl_status->status, destroy_session->session_id);
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
err = 0;
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index ad0849af42d7..13cde44c6086 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -736,4 +736,3 @@ module_exit(cxl_acpi_exit);
MODULE_LICENSE("GPL v2");
MODULE_IMPORT_NS(CXL);
MODULE_IMPORT_NS(ACPI);
-MODULE_SOFTDEP("pre: cxl_pmem");
diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
index f3d2169b6731..c2e4b1093788 100644
--- a/drivers/cxl/core/pmem.c
+++ b/drivers/cxl/core/pmem.c
@@ -227,34 +227,16 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_nvdimm_bridge *cxl_nvb,
return cxl_nvd;
}
-static void cxl_nvd_unregister(void *_cxl_nvd)
+static void cxlmd_release_nvdimm(void *_cxlmd)
{
- struct cxl_nvdimm *cxl_nvd = _cxl_nvd;
- struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
+ struct cxl_memdev *cxlmd = _cxlmd;
+ struct cxl_nvdimm *cxl_nvd = cxlmd->cxl_nvd;
struct cxl_nvdimm_bridge *cxl_nvb = cxlmd->cxl_nvb;
- /*
- * Either the bridge is in ->remove() context under the device_lock(),
- * or cxlmd_release_nvdimm() is cancelling the bridge's release action
- * for @cxl_nvd and doing it itself (while manually holding the bridge
- * lock).
- */
- device_lock_assert(&cxl_nvb->dev);
cxl_nvd->cxlmd = NULL;
cxlmd->cxl_nvd = NULL;
+ cxlmd->cxl_nvb = NULL;
device_unregister(&cxl_nvd->dev);
-}
-
-static void cxlmd_release_nvdimm(void *_cxlmd)
-{
- struct cxl_memdev *cxlmd = _cxlmd;
- struct cxl_nvdimm_bridge *cxl_nvb = cxlmd->cxl_nvb;
-
- device_lock(&cxl_nvb->dev);
- if (cxlmd->cxl_nvd)
- devm_release_action(&cxl_nvb->dev, cxl_nvd_unregister,
- cxlmd->cxl_nvd);
- device_unlock(&cxl_nvb->dev);
put_device(&cxl_nvb->dev);
}
@@ -293,22 +275,6 @@ int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd)
dev_dbg(&cxlmd->dev, "register %s\n", dev_name(dev));
- /*
- * The two actions below arrange for @cxl_nvd to be deleted when either
- * the top-level PMEM bridge goes down, or the endpoint device goes
- * through ->remove().
- */
- device_lock(&cxl_nvb->dev);
- if (cxl_nvb->dev.driver)
- rc = devm_add_action_or_reset(&cxl_nvb->dev, cxl_nvd_unregister,
- cxl_nvd);
- else
- rc = -ENXIO;
- device_unlock(&cxl_nvb->dev);
-
- if (rc)
- goto err_alloc;
-
/* @cxlmd carries a reference on @cxl_nvb until cxlmd_release_nvdimm */
return devm_add_action_or_reset(&cxlmd->dev, cxlmd_release_nvdimm, cxlmd);
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 02f28da519e3..940f805b1534 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -131,7 +131,7 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct cxl_port *iter = cxled_to_port(cxled);
struct cxl_ep *ep;
- int rc;
+ int rc = 0;
while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
iter = to_cxl_port(iter->dev.parent);
@@ -143,7 +143,8 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
cxl_rr = cxl_rr_load(iter, cxlr);
cxld = cxl_rr->decoder;
- rc = cxld->reset(cxld);
+ if (cxld->reset)
+ rc = cxld->reset(cxld);
if (rc)
return rc;
}
@@ -186,7 +187,8 @@ static int cxl_region_decode_commit(struct cxl_region *cxlr)
iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
cxl_rr = cxl_rr_load(iter, cxlr);
cxld = cxl_rr->decoder;
- cxld->reset(cxld);
+ if (cxld->reset)
+ cxld->reset(cxld);
}
cxled->cxld.reset(&cxled->cxld);
@@ -991,10 +993,10 @@ static int cxl_port_setup_targets(struct cxl_port *port,
int i, distance;
/*
- * Passthrough ports impose no distance requirements between
+ * Passthrough decoders impose no distance requirements between
* peers
*/
- if (port->nr_dports == 1)
+ if (cxl_rr->nr_targets == 1)
distance = 0;
else
distance = p->nr_targets / cxl_rr->nr_targets;
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 33083a522fd1..258004f34281 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -554,8 +554,11 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
/* If multiple errors, log header points to first error from ctrl reg */
if (hweight32(status) > 1) {
- addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
- fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(addr)));
+ void __iomem *rcc_addr =
+ cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
+
+ fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
+ readl(rcc_addr)));
} else {
fe = status;
}
diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
index eedefebc4283..08bbbac9a6d0 100644
--- a/drivers/cxl/pmem.c
+++ b/drivers/cxl/pmem.c
@@ -225,11 +225,35 @@ static int cxl_pmem_ctl(struct nvdimm_bus_descriptor *nd_desc,
return cxl_pmem_nvdimm_ctl(nvdimm, cmd, buf, buf_len);
}
+static int detach_nvdimm(struct device *dev, void *data)
+{
+ struct cxl_nvdimm *cxl_nvd;
+ bool release = false;
+
+ if (!is_cxl_nvdimm(dev))
+ return 0;
+
+ device_lock(dev);
+ if (!dev->driver)
+ goto out;
+
+ cxl_nvd = to_cxl_nvdimm(dev);
+ if (cxl_nvd->cxlmd && cxl_nvd->cxlmd->cxl_nvb == data)
+ release = true;
+out:
+ device_unlock(dev);
+ if (release)
+ device_release_driver(dev);
+ return 0;
+}
+
static void unregister_nvdimm_bus(void *_cxl_nvb)
{
struct cxl_nvdimm_bridge *cxl_nvb = _cxl_nvb;
struct nvdimm_bus *nvdimm_bus = cxl_nvb->nvdimm_bus;
+ bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb, detach_nvdimm);
+
cxl_nvb->nvdimm_bus = NULL;
nvdimm_bus_unregister(nvdimm_bus);
}
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index da4438f3188c..c4c4728a36e4 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -475,7 +475,7 @@ EXPORT_SYMBOL_GPL(put_dax);
/**
* dax_holder() - obtain the holder of a dax device
* @dax_dev: a dax_device instance
-
+ *
* Return: the holder's data which represents the holder if registered,
* otherwize NULL.
*/
diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.c b/drivers/dma-buf/dma-buf-sysfs-stats.c
index f69d68122b9b..fbf725fae7c1 100644
--- a/drivers/dma-buf/dma-buf-sysfs-stats.c
+++ b/drivers/dma-buf/dma-buf-sysfs-stats.c
@@ -168,14 +168,11 @@ void dma_buf_uninit_sysfs_statistics(void)
kset_unregister(dma_buf_stats_kset);
}
-int dma_buf_stats_setup(struct dma_buf *dmabuf)
+int dma_buf_stats_setup(struct dma_buf *dmabuf, struct file *file)
{
struct dma_buf_sysfs_entry *sysfs_entry;
int ret;
- if (!dmabuf || !dmabuf->file)
- return -EINVAL;
-
if (!dmabuf->exp_name) {
pr_err("exporter name must not be empty if stats needed\n");
return -EINVAL;
@@ -192,7 +189,7 @@ int dma_buf_stats_setup(struct dma_buf *dmabuf)
/* create the directory for buffer stats */
ret = kobject_init_and_add(&sysfs_entry->kobj, &dma_buf_ktype, NULL,
- "%lu", file_inode(dmabuf->file)->i_ino);
+ "%lu", file_inode(file)->i_ino);
if (ret)
goto err_sysfs_dmabuf;
diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.h b/drivers/dma-buf/dma-buf-sysfs-stats.h
index a49c6e2650cc..7a8a995b75ba 100644
--- a/drivers/dma-buf/dma-buf-sysfs-stats.h
+++ b/drivers/dma-buf/dma-buf-sysfs-stats.h
@@ -13,7 +13,7 @@
int dma_buf_init_sysfs_statistics(void);
void dma_buf_uninit_sysfs_statistics(void);
-int dma_buf_stats_setup(struct dma_buf *dmabuf);
+int dma_buf_stats_setup(struct dma_buf *dmabuf, struct file *file);
void dma_buf_stats_teardown(struct dma_buf *dmabuf);
#else
@@ -25,7 +25,7 @@ static inline int dma_buf_init_sysfs_statistics(void)
static inline void dma_buf_uninit_sysfs_statistics(void) {}
-static inline int dma_buf_stats_setup(struct dma_buf *dmabuf)
+static inline int dma_buf_stats_setup(struct dma_buf *dmabuf, struct file *file)
{
return 0;
}
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index b6c36914e7c6..e6528767efc7 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -95,10 +95,11 @@ static int dma_buf_file_release(struct inode *inode, struct file *file)
return -EINVAL;
dmabuf = file->private_data;
-
- mutex_lock(&db_list.lock);
- list_del(&dmabuf->list_node);
- mutex_unlock(&db_list.lock);
+ if (dmabuf) {
+ mutex_lock(&db_list.lock);
+ list_del(&dmabuf->list_node);
+ mutex_unlock(&db_list.lock);
+ }
return 0;
}
@@ -528,17 +529,17 @@ static inline int is_dma_buf_file(struct file *file)
return file->f_op == &dma_buf_fops;
}
-static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags)
+static struct file *dma_buf_getfile(size_t size, int flags)
{
static atomic64_t dmabuf_inode = ATOMIC64_INIT(0);
- struct file *file;
struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb);
+ struct file *file;
if (IS_ERR(inode))
return ERR_CAST(inode);
- inode->i_size = dmabuf->size;
- inode_set_bytes(inode, dmabuf->size);
+ inode->i_size = size;
+ inode_set_bytes(inode, size);
/*
* The ->i_ino acquired from get_next_ino() is not unique thus
@@ -552,8 +553,6 @@ static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags)
flags, &dma_buf_fops);
if (IS_ERR(file))
goto err_alloc_file;
- file->private_data = dmabuf;
- file->f_path.dentry->d_fsdata = dmabuf;
return file;
@@ -619,19 +618,11 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
size_t alloc_size = sizeof(struct dma_buf);
int ret;
- if (!exp_info->resv)
- alloc_size += sizeof(struct dma_resv);
- else
- /* prevent &dma_buf[1] == dma_buf->resv */
- alloc_size += 1;
-
- if (WARN_ON(!exp_info->priv
- || !exp_info->ops
- || !exp_info->ops->map_dma_buf
- || !exp_info->ops->unmap_dma_buf
- || !exp_info->ops->release)) {
+ if (WARN_ON(!exp_info->priv || !exp_info->ops
+ || !exp_info->ops->map_dma_buf
+ || !exp_info->ops->unmap_dma_buf
+ || !exp_info->ops->release))
return ERR_PTR(-EINVAL);
- }
if (WARN_ON(exp_info->ops->cache_sgt_mapping &&
(exp_info->ops->pin || exp_info->ops->unpin)))
@@ -643,10 +634,21 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
if (!try_module_get(exp_info->owner))
return ERR_PTR(-ENOENT);
+ file = dma_buf_getfile(exp_info->size, exp_info->flags);
+ if (IS_ERR(file)) {
+ ret = PTR_ERR(file);
+ goto err_module;
+ }
+
+ if (!exp_info->resv)
+ alloc_size += sizeof(struct dma_resv);
+ else
+ /* prevent &dma_buf[1] == dma_buf->resv */
+ alloc_size += 1;
dmabuf = kzalloc(alloc_size, GFP_KERNEL);
if (!dmabuf) {
ret = -ENOMEM;
- goto err_module;
+ goto err_file;
}
dmabuf->priv = exp_info->priv;
@@ -658,43 +660,35 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
init_waitqueue_head(&dmabuf->poll);
dmabuf->cb_in.poll = dmabuf->cb_out.poll = &dmabuf->poll;
dmabuf->cb_in.active = dmabuf->cb_out.active = 0;
+ INIT_LIST_HEAD(&dmabuf->attachments);
if (!resv) {
- resv = (struct dma_resv *)&dmabuf[1];
- dma_resv_init(resv);
+ dmabuf->resv = (struct dma_resv *)&dmabuf[1];
+ dma_resv_init(dmabuf->resv);
+ } else {
+ dmabuf->resv = resv;
}
- dmabuf->resv = resv;
- file = dma_buf_getfile(dmabuf, exp_info->flags);
- if (IS_ERR(file)) {
- ret = PTR_ERR(file);
+ ret = dma_buf_stats_setup(dmabuf, file);
+ if (ret)
goto err_dmabuf;
- }
+ file->private_data = dmabuf;
+ file->f_path.dentry->d_fsdata = dmabuf;
dmabuf->file = file;
- INIT_LIST_HEAD(&dmabuf->attachments);
-
mutex_lock(&db_list.lock);
list_add(&dmabuf->list_node, &db_list.head);
mutex_unlock(&db_list.lock);
- ret = dma_buf_stats_setup(dmabuf);
- if (ret)
- goto err_sysfs;
-
return dmabuf;
-err_sysfs:
- /*
- * Set file->f_path.dentry->d_fsdata to NULL so that when
- * dma_buf_release() gets invoked by dentry_ops, it exits
- * early before calling the release() dma_buf op.
- */
- file->f_path.dentry->d_fsdata = NULL;
- fput(file);
err_dmabuf:
+ if (!resv)
+ dma_resv_fini(dmabuf->resv);
kfree(dmabuf);
+err_file:
+ fput(file);
err_module:
module_put(exp_info->owner);
return ERR_PTR(ret);
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 406b4e26f538..0de0482cd36e 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -167,7 +167,7 @@ struct dma_fence *dma_fence_allocate_private_stub(void)
0, 0);
set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
- &dma_fence_stub.flags);
+ &fence->flags);
dma_fence_signal(fence);
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index c741b6431958..8a6e6b60d66f 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -451,7 +451,8 @@ static int dma_chan_get(struct dma_chan *chan)
/* The channel is already in use, update client count */
if (chan->client_count) {
__module_get(owner);
- goto out;
+ chan->client_count++;
+ return 0;
}
if (!try_module_get(owner))
@@ -470,11 +471,11 @@ static int dma_chan_get(struct dma_chan *chan)
goto err_out;
}
+ chan->client_count++;
+
if (!dma_has_cap(DMA_PRIVATE, chan->device->cap_mask))
balance_ref_count(chan);
-out:
- chan->client_count++;
return 0;
err_out:
diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
index a183d93bd7e2..bf85aa0979ec 100644
--- a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
+++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
@@ -1018,6 +1018,11 @@ static noinline void axi_chan_handle_err(struct axi_dma_chan *chan, u32 status)
/* The bad descriptor currently is in the head of vc list */
vd = vchan_next_desc(&chan->vc);
+ if (!vd) {
+ dev_err(chan2dev(chan), "BUG: %s, IRQ with no descriptors\n",
+ axi_chan_name(chan));
+ goto out;
+ }
/* Remove the completed descriptor from issued list */
list_del(&vd->node);
@@ -1032,6 +1037,7 @@ static noinline void axi_chan_handle_err(struct axi_dma_chan *chan, u32 status)
/* Try to restart the controller */
axi_chan_start_first_queued(chan);
+out:
spin_unlock_irqrestore(&chan->vc.lock, flags);
}
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index 06f5d3783d77..29dbb0f52e18 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -1172,8 +1172,19 @@ static void idxd_flush_pending_descs(struct idxd_irq_entry *ie)
spin_unlock(&ie->list_lock);
list_for_each_entry_safe(desc, itr, &flist, list) {
+ struct dma_async_tx_descriptor *tx;
+
list_del(&desc->list);
ctype = desc->completion->status ? IDXD_COMPLETE_NORMAL : IDXD_COMPLETE_ABORT;
+ /*
+ * wq is being disabled. Any remaining descriptors are
+ * likely to be stuck and can be dropped. callback could
+ * point to code that is no longer accessible, for example
+ * if dmatest module has been unloaded.
+ */
+ tx = &desc->txd;
+ tx->callback = NULL;
+ tx->callback_result = NULL;
idxd_dma_complete_txd(desc, ctype, true);
}
}
@@ -1390,8 +1401,7 @@ err_res_alloc:
err_irq:
idxd_wq_unmap_portal(wq);
err_map_portal:
- rc = idxd_wq_disable(wq, false);
- if (rc < 0)
+ if (idxd_wq_disable(wq, false))
dev_dbg(dev, "wq %s disable failed\n", dev_name(wq_confdev(wq)));
err:
return rc;
@@ -1408,11 +1418,11 @@ void drv_disable_wq(struct idxd_wq *wq)
dev_warn(dev, "Clients has claim on wq %d: %d\n",
wq->id, idxd_wq_refcount(wq));
- idxd_wq_free_resources(wq);
idxd_wq_unmap_portal(wq);
idxd_wq_drain(wq);
idxd_wq_free_irq(wq);
idxd_wq_reset(wq);
+ idxd_wq_free_resources(wq);
percpu_ref_exit(&wq->wq_active);
wq->type = IDXD_WQT_NONE;
wq->client_count = 0;
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index fbea5f62dd98..b926abe4fa43 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -1521,10 +1521,12 @@ static struct sdma_desc *sdma_transfer_init(struct sdma_channel *sdmac,
sdma_config_ownership(sdmac, false, true, false);
if (sdma_load_context(sdmac))
- goto err_desc_out;
+ goto err_bd_out;
return desc;
+err_bd_out:
+ sdma_free_bd(desc);
err_desc_out:
kfree(desc);
err_out:
diff --git a/drivers/dma/lgm/lgm-dma.c b/drivers/dma/lgm/lgm-dma.c
index 9b9184f964be..1709d159af7e 100644
--- a/drivers/dma/lgm/lgm-dma.c
+++ b/drivers/dma/lgm/lgm-dma.c
@@ -914,7 +914,7 @@ static void ldma_dev_init(struct ldma_dev *d)
}
}
-static int ldma_cfg_init(struct ldma_dev *d)
+static int ldma_parse_dt(struct ldma_dev *d)
{
struct fwnode_handle *fwnode = dev_fwnode(d->dev);
struct ldma_port *p;
@@ -1661,10 +1661,6 @@ static int intel_ldma_probe(struct platform_device *pdev)
p->ldev = d;
}
- ret = ldma_cfg_init(d);
- if (ret)
- return ret;
-
dma_dev->dev = &pdev->dev;
ch_mask = (unsigned long)d->channels_mask;
@@ -1675,6 +1671,10 @@ static int intel_ldma_probe(struct platform_device *pdev)
ldma_dma_init_v3X(j, d);
}
+ ret = ldma_parse_dt(d);
+ if (ret)
+ return ret;
+
dma_dev->device_alloc_chan_resources = ldma_alloc_chan_resources;
dma_dev->device_free_chan_resources = ldma_free_chan_resources;
dma_dev->device_terminate_all = ldma_terminate_all;
diff --git a/drivers/dma/ptdma/ptdma-dev.c b/drivers/dma/ptdma/ptdma-dev.c
index 377da23012ac..a2bf13ff18b6 100644
--- a/drivers/dma/ptdma/ptdma-dev.c
+++ b/drivers/dma/ptdma/ptdma-dev.c
@@ -71,12 +71,13 @@ static int pt_core_execute_cmd(struct ptdma_desc *desc, struct pt_cmd_queue *cmd
bool soc = FIELD_GET(DWORD0_SOC, desc->dw0);
u8 *q_desc = (u8 *)&cmd_q->qbase[cmd_q->qidx];
u32 tail;
+ unsigned long flags;
if (soc) {
desc->dw0 |= FIELD_PREP(DWORD0_IOC, desc->dw0);
desc->dw0 &= ~DWORD0_SOC;
}
- mutex_lock(&cmd_q->q_mutex);
+ spin_lock_irqsave(&cmd_q->q_lock, flags);
/* Copy 32-byte command descriptor to hw queue. */
memcpy(q_desc, desc, 32);
@@ -91,7 +92,7 @@ static int pt_core_execute_cmd(struct ptdma_desc *desc, struct pt_cmd_queue *cmd
/* Turn the queue back on using our cached control register */
pt_start_queue(cmd_q);
- mutex_unlock(&cmd_q->q_mutex);
+ spin_unlock_irqrestore(&cmd_q->q_lock, flags);
return 0;
}
@@ -199,7 +200,7 @@ int pt_core_init(struct pt_device *pt)
cmd_q->pt = pt;
cmd_q->dma_pool = dma_pool;
- mutex_init(&cmd_q->q_mutex);
+ spin_lock_init(&cmd_q->q_lock);
/* Page alignment satisfies our needs for N <= 128 */
cmd_q->qsize = Q_SIZE(Q_DESC_SIZE);
diff --git a/drivers/dma/ptdma/ptdma.h b/drivers/dma/ptdma/ptdma.h
index d093c43b7d13..21b4bf895200 100644
--- a/drivers/dma/ptdma/ptdma.h
+++ b/drivers/dma/ptdma/ptdma.h
@@ -196,7 +196,7 @@ struct pt_cmd_queue {
struct ptdma_desc *qbase;
/* Aligned queue start address (per requirement) */
- struct mutex q_mutex ____cacheline_aligned;
+ spinlock_t q_lock ____cacheline_aligned;
unsigned int qidx;
unsigned int qsize;
diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
index 061add832295..59a36cbf9b5f 100644
--- a/drivers/dma/qcom/gpi.c
+++ b/drivers/dma/qcom/gpi.c
@@ -1756,6 +1756,7 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc,
tre->dword[3] = u32_encode_bits(TRE_TYPE_GO, TRE_FLAGS_TYPE);
if (spi->cmd == SPI_RX) {
tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOB);
+ tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_LINK);
} else if (spi->cmd == SPI_TX) {
tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_CHAIN);
} else { /* SPI_DUPLEX */
diff --git a/drivers/dma/tegra186-gpc-dma.c b/drivers/dma/tegra186-gpc-dma.c
index 1d1180db6d4e..8f67f453a492 100644
--- a/drivers/dma/tegra186-gpc-dma.c
+++ b/drivers/dma/tegra186-gpc-dma.c
@@ -711,6 +711,7 @@ static int tegra_dma_terminate_all(struct dma_chan *dc)
return err;
}
+ vchan_terminate_vdesc(&tdc->dma_desc->vd);
tegra_dma_disable(tdc);
tdc->dma_desc = NULL;
}
diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c
index ae39b52012b2..79da93cc77b6 100644
--- a/drivers/dma/tegra210-adma.c
+++ b/drivers/dma/tegra210-adma.c
@@ -221,7 +221,7 @@ static int tegra_adma_init(struct tegra_adma *tdma)
int ret;
/* Clear any interrupts */
- tdma_write(tdma, tdma->cdata->global_int_clear, 0x1);
+ tdma_write(tdma, tdma->cdata->ch_base_offset + tdma->cdata->global_int_clear, 0x1);
/* Assert soft reset */
tdma_write(tdma, ADMA_GLOBAL_SOFT_RESET, 0x1);
diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
index ce8b80bb34d7..4c62274e0b33 100644
--- a/drivers/dma/ti/k3-udma.c
+++ b/drivers/dma/ti/k3-udma.c
@@ -762,11 +762,12 @@ static void udma_decrement_byte_counters(struct udma_chan *uc, u32 val)
if (uc->desc->dir == DMA_DEV_TO_MEM) {
udma_rchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val);
udma_rchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val);
- udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val);
+ if (uc->config.ep_type != PSIL_EP_NATIVE)
+ udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val);
} else {
udma_tchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val);
udma_tchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val);
- if (!uc->bchan)
+ if (!uc->bchan && uc->config.ep_type != PSIL_EP_NATIVE)
udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val);
}
}
diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index a8d23cdf883e..ac09f0e5f58d 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -3143,8 +3143,10 @@ static int xilinx_dma_probe(struct platform_device *pdev)
/* Initialize the channels */
for_each_child_of_node(node, child) {
err = xilinx_dma_child_probe(xdev, child);
- if (err < 0)
+ if (err < 0) {
+ of_node_put(child);
goto error;
+ }
}
if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index 19522c568aa5..0689e1510721 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -34,6 +34,9 @@
static DEFINE_MUTEX(device_ctls_mutex);
static LIST_HEAD(edac_device_list);
+/* Default workqueue processing interval on this instance, in msecs */
+#define DEFAULT_POLL_INTERVAL 1000
+
#ifdef CONFIG_EDAC_DEBUG
static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev)
{
@@ -336,7 +339,7 @@ static void edac_device_workq_function(struct work_struct *work_req)
* whole one second to save timers firing all over the period
* between integral seconds
*/
- if (edac_dev->poll_msec == 1000)
+ if (edac_dev->poll_msec == DEFAULT_POLL_INTERVAL)
edac_queue_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay));
else
edac_queue_work(&edac_dev->work, edac_dev->delay);
@@ -366,7 +369,7 @@ static void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev,
* timers firing on sub-second basis, while they are happy
* to fire together on the 1 second exactly
*/
- if (edac_dev->poll_msec == 1000)
+ if (edac_dev->poll_msec == DEFAULT_POLL_INTERVAL)
edac_queue_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay));
else
edac_queue_work(&edac_dev->work, edac_dev->delay);
@@ -394,17 +397,16 @@ static void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev)
* Then restart the workq on the new delay
*/
void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev,
- unsigned long value)
+ unsigned long msec)
{
- unsigned long jiffs = msecs_to_jiffies(value);
-
- if (value == 1000)
- jiffs = round_jiffies_relative(value);
-
- edac_dev->poll_msec = value;
- edac_dev->delay = jiffs;
+ edac_dev->poll_msec = msec;
+ edac_dev->delay = msecs_to_jiffies(msec);
- edac_mod_work(&edac_dev->work, jiffs);
+ /* See comment in edac_device_workq_setup() above */
+ if (edac_dev->poll_msec == DEFAULT_POLL_INTERVAL)
+ edac_mod_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay));
+ else
+ edac_mod_work(&edac_dev->work, edac_dev->delay);
}
int edac_device_alloc_index(void)
@@ -443,11 +445,7 @@ int edac_device_add_device(struct edac_device_ctl_info *edac_dev)
/* This instance is NOW RUNNING */
edac_dev->op_state = OP_RUNNING_POLL;
- /*
- * enable workq processing on this instance,
- * default = 1000 msec
- */
- edac_device_workq_setup(edac_dev, 1000);
+ edac_device_workq_setup(edac_dev, edac_dev->poll_msec ?: DEFAULT_POLL_INTERVAL);
} else {
edac_dev->op_state = OP_RUNNING_INTERRUPT;
}
diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h
index 763c076d96f2..47593afdc234 100644
--- a/drivers/edac/edac_module.h
+++ b/drivers/edac/edac_module.h
@@ -53,7 +53,7 @@ bool edac_stop_work(struct delayed_work *work);
bool edac_mod_work(struct delayed_work *work, unsigned long delay);
extern void edac_device_reset_delay_period(struct edac_device_ctl_info
- *edac_dev, unsigned long value);
+ *edac_dev, unsigned long msec);
extern void edac_mc_reset_delay_period(unsigned long value);
/*
diff --git a/drivers/edac/highbank_mc_edac.c b/drivers/edac/highbank_mc_edac.c
index 61b76ec226af..19fba258ae10 100644
--- a/drivers/edac/highbank_mc_edac.c
+++ b/drivers/edac/highbank_mc_edac.c
@@ -174,8 +174,10 @@ static int highbank_mc_probe(struct platform_device *pdev)
drvdata = mci->pvt_info;
platform_set_drvdata(pdev, mci);
- if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL))
- return -ENOMEM;
+ if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) {
+ res = -ENOMEM;
+ goto free;
+ }
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!r) {
@@ -243,6 +245,7 @@ err2:
edac_mc_del_mc(&pdev->dev);
err:
devres_release_group(&pdev->dev, NULL);
+free:
edac_mc_free(mci);
return res;
}
diff --git a/drivers/edac/qcom_edac.c b/drivers/edac/qcom_edac.c
index 97a27e42dd61..c45519f59dc1 100644
--- a/drivers/edac/qcom_edac.c
+++ b/drivers/edac/qcom_edac.c
@@ -252,7 +252,7 @@ clear:
static int
dump_syn_reg(struct edac_device_ctl_info *edev_ctl, int err_type, u32 bank)
{
- struct llcc_drv_data *drv = edev_ctl->pvt_info;
+ struct llcc_drv_data *drv = edev_ctl->dev->platform_data;
int ret;
ret = dump_syn_reg_values(drv, bank, err_type);
@@ -289,7 +289,7 @@ static irqreturn_t
llcc_ecc_irq_handler(int irq, void *edev_ctl)
{
struct edac_device_ctl_info *edac_dev_ctl = edev_ctl;
- struct llcc_drv_data *drv = edac_dev_ctl->pvt_info;
+ struct llcc_drv_data *drv = edac_dev_ctl->dev->platform_data;
irqreturn_t irq_rc = IRQ_NONE;
u32 drp_error, trp_error, i;
int ret;
@@ -358,7 +358,6 @@ static int qcom_llcc_edac_probe(struct platform_device *pdev)
edev_ctl->dev_name = dev_name(dev);
edev_ctl->ctl_name = "llcc";
edev_ctl->panic_on_ue = LLCC_ERP_PANIC_ON_UE;
- edev_ctl->pvt_info = llcc_driv_data;
rc = edac_device_add_device(edev_ctl);
if (rc)
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 9c89f7d53e99..958aa4662ccb 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -819,8 +819,10 @@ static int ioctl_send_response(struct client *client, union ioctl_arg *arg)
r = container_of(resource, struct inbound_transaction_resource,
resource);
- if (is_fcp_request(r->request))
+ if (is_fcp_request(r->request)) {
+ kfree(r->data);
goto out;
+ }
if (a->length != fw_get_response_length(r->request)) {
ret = -EINVAL;
diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c
index f818d00bb2c6..ffdad59ec81f 100644
--- a/drivers/firmware/arm_scmi/driver.c
+++ b/drivers/firmware/arm_scmi/driver.c
@@ -910,6 +910,8 @@ static int do_xfer(const struct scmi_protocol_handle *ph,
xfer->hdr.protocol_id, xfer->hdr.seq,
xfer->hdr.poll_completion);
+ /* Clear any stale status */
+ xfer->hdr.status = SCMI_SUCCESS;
xfer->state = SCMI_XFER_SENT_OK;
/*
* Even though spinlocking is not needed here since no race is possible
diff --git a/drivers/firmware/arm_scmi/shmem.c b/drivers/firmware/arm_scmi/shmem.c
index 1dfe534b8518..87b4f4d35f06 100644
--- a/drivers/firmware/arm_scmi/shmem.c
+++ b/drivers/firmware/arm_scmi/shmem.c
@@ -81,10 +81,11 @@ u32 shmem_read_header(struct scmi_shared_mem __iomem *shmem)
void shmem_fetch_response(struct scmi_shared_mem __iomem *shmem,
struct scmi_xfer *xfer)
{
+ size_t len = ioread32(&shmem->length);
+
xfer->hdr.status = ioread32(shmem->msg_payload);
/* Skip the length of header and status in shmem area i.e 8 bytes */
- xfer->rx.len = min_t(size_t, xfer->rx.len,
- ioread32(&shmem->length) - 8);
+ xfer->rx.len = min_t(size_t, xfer->rx.len, len > 8 ? len - 8 : 0);
/* Take a copy to the rx buffer.. */
memcpy_fromio(xfer->rx.buf, shmem->msg_payload + 4, xfer->rx.len);
@@ -93,8 +94,10 @@ void shmem_fetch_response(struct scmi_shared_mem __iomem *shmem,
void shmem_fetch_notification(struct scmi_shared_mem __iomem *shmem,
size_t max_len, struct scmi_xfer *xfer)
{
+ size_t len = ioread32(&shmem->length);
+
/* Skip only the length of header in shmem area i.e 4 bytes */
- xfer->rx.len = min_t(size_t, max_len, ioread32(&shmem->length) - 4);
+ xfer->rx.len = min_t(size_t, max_len, len > 4 ? len - 4 : 0);
/* Take a copy to the rx buffer.. */
memcpy_fromio(xfer->rx.buf, shmem->msg_payload, xfer->rx.len);
diff --git a/drivers/firmware/arm_scmi/virtio.c b/drivers/firmware/arm_scmi/virtio.c
index 33c9b81a55cd..1db975c08896 100644
--- a/drivers/firmware/arm_scmi/virtio.c
+++ b/drivers/firmware/arm_scmi/virtio.c
@@ -160,7 +160,6 @@ static void scmi_vio_channel_cleanup_sync(struct scmi_vio_channel *vioch)
}
vioch->shutdown_done = &vioch_shutdown_done;
- virtio_break_device(vioch->vqueue->vdev);
if (!vioch->is_rx && vioch->deferred_tx_wq)
/* Cannot be kicked anymore after this...*/
vioch->deferred_tx_wq = NULL;
@@ -482,6 +481,12 @@ static int virtio_chan_free(int id, void *p, void *data)
struct scmi_chan_info *cinfo = p;
struct scmi_vio_channel *vioch = cinfo->transport_info;
+ /*
+ * Break device to inhibit further traffic flowing while shutting down
+ * the channels: doing it later holding vioch->lock creates unsafe
+ * locking dependency chains as reported by LOCKDEP.
+ */
+ virtio_break_device(vioch->vqueue->vdev);
scmi_vio_channel_cleanup_sync(vioch);
scmi_free_channel(cinfo, data, id);
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 09716eebe8ac..1e0b016fdc2b 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -394,8 +394,8 @@ static int __init efisubsys_init(void)
efi_kobj = kobject_create_and_add("efi", firmware_kobj);
if (!efi_kobj) {
pr_err("efi: Firmware registration failed.\n");
- destroy_workqueue(efi_rts_wq);
- return -ENOMEM;
+ error = -ENOMEM;
+ goto err_destroy_wq;
}
if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE |
@@ -443,7 +443,10 @@ err_unregister:
err_put:
kobject_put(efi_kobj);
efi_kobj = NULL;
- destroy_workqueue(efi_rts_wq);
+err_destroy_wq:
+ if (efi_rts_wq)
+ destroy_workqueue(efi_rts_wq);
+
return error;
}
@@ -1004,6 +1007,8 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
/* first try to find a slot in an existing linked list entry */
for (prsv = efi_memreserve_root->next; prsv; ) {
rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB);
+ if (!rsv)
+ return -ENOMEM;
index = atomic_fetch_add_unless(&rsv->count, 1, rsv->size);
if (index < rsv->size) {
rsv->entry[index].base = addr;
diff --git a/drivers/firmware/efi/libstub/arm64.c b/drivers/firmware/efi/libstub/arm64.c
index ff2d18c42ee7..4501652e11ab 100644
--- a/drivers/firmware/efi/libstub/arm64.c
+++ b/drivers/firmware/efi/libstub/arm64.c
@@ -19,10 +19,13 @@ static bool system_needs_vamap(void)
const u8 *type1_family = efi_get_smbios_string(1, family);
/*
- * Ampere Altra machines crash in SetTime() if SetVirtualAddressMap()
- * has not been called prior.
+ * Ampere eMAG, Altra, and Altra Max machines crash in SetTime() if
+ * SetVirtualAddressMap() has not been called prior.
*/
- if (!type1_family || strcmp(type1_family, "Altra"))
+ if (!type1_family || (
+ strcmp(type1_family, "eMAG") &&
+ strcmp(type1_family, "Altra") &&
+ strcmp(type1_family, "Altra Max")))
return false;
efi_warn("Working around broken SetVirtualAddressMap()\n");
diff --git a/drivers/firmware/efi/memattr.c b/drivers/firmware/efi/memattr.c
index 0a9aba5f9cef..f178b2984dfb 100644
--- a/drivers/firmware/efi/memattr.c
+++ b/drivers/firmware/efi/memattr.c
@@ -33,7 +33,7 @@ int __init efi_memattr_init(void)
return -ENOMEM;
}
- if (tbl->version > 1) {
+ if (tbl->version > 2) {
pr_warn("Unexpected EFI Memory Attributes table version %d\n",
tbl->version);
goto unmap;
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index 7feee3d9c2bf..1fba4e09cdcf 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -62,6 +62,7 @@ struct efi_runtime_work efi_rts_work;
\
if (!efi_enabled(EFI_RUNTIME_SERVICES)) { \
pr_warn_once("EFI Runtime Services are disabled!\n"); \
+ efi_rts_work.status = EFI_DEVICE_ERROR; \
goto exit; \
} \
\
diff --git a/drivers/firmware/google/coreboot_table.c b/drivers/firmware/google/coreboot_table.c
index 2652c396c423..33ae94745aef 100644
--- a/drivers/firmware/google/coreboot_table.c
+++ b/drivers/firmware/google/coreboot_table.c
@@ -93,14 +93,19 @@ static int coreboot_table_populate(struct device *dev, void *ptr)
for (i = 0; i < header->table_entries; i++) {
entry = ptr_entry;
- device = kzalloc(sizeof(struct device) + entry->size, GFP_KERNEL);
+ if (entry->size < sizeof(*entry)) {
+ dev_warn(dev, "coreboot table entry too small!\n");
+ return -EINVAL;
+ }
+
+ device = kzalloc(sizeof(device->dev) + entry->size, GFP_KERNEL);
if (!device)
return -ENOMEM;
device->dev.parent = dev;
device->dev.bus = &coreboot_bus_type;
device->dev.release = coreboot_device_release;
- memcpy(&device->entry, ptr_entry, entry->size);
+ memcpy(device->raw, ptr_entry, entry->size);
switch (device->entry.tag) {
case LB_TAG_CBMEM_ENTRY:
diff --git a/drivers/firmware/google/coreboot_table.h b/drivers/firmware/google/coreboot_table.h
index 37f4d335a606..d814dca33a08 100644
--- a/drivers/firmware/google/coreboot_table.h
+++ b/drivers/firmware/google/coreboot_table.h
@@ -79,6 +79,7 @@ struct coreboot_device {
struct lb_cbmem_ref cbmem_ref;
struct lb_cbmem_entry cbmem_entry;
struct lb_framebuffer framebuffer;
+ DECLARE_FLEX_ARRAY(u8, raw);
};
};
diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c
index 4e2575dfeb90..871bedf533a8 100644
--- a/drivers/firmware/google/gsmi.c
+++ b/drivers/firmware/google/gsmi.c
@@ -361,9 +361,10 @@ static efi_status_t gsmi_get_variable(efi_char16_t *name,
memcpy(data, gsmi_dev.data_buf->start, *data_size);
/* All variables are have the following attributes */
- *attr = EFI_VARIABLE_NON_VOLATILE |
- EFI_VARIABLE_BOOTSERVICE_ACCESS |
- EFI_VARIABLE_RUNTIME_ACCESS;
+ if (attr)
+ *attr = EFI_VARIABLE_NON_VOLATILE |
+ EFI_VARIABLE_BOOTSERVICE_ACCESS |
+ EFI_VARIABLE_RUNTIME_ACCESS;
}
spin_unlock_irqrestore(&gsmi_dev.lock, flags);
diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c
index e7bcfca4159f..447ee4ea5c90 100644
--- a/drivers/firmware/psci/psci.c
+++ b/drivers/firmware/psci/psci.c
@@ -440,6 +440,9 @@ static const struct file_operations psci_debugfs_ops = {
static int __init psci_debugfs_init(void)
{
+ if (!invoke_psci_fn || !psci_ops.get_version)
+ return 0;
+
return PTR_ERR_OR_ZERO(debugfs_create_file("psci", 0444, NULL, NULL,
&psci_debugfs_ops));
}
diff --git a/drivers/fpga/intel-m10-bmc-sec-update.c b/drivers/fpga/intel-m10-bmc-sec-update.c
index 79d48852825e..03f1bd81c434 100644
--- a/drivers/fpga/intel-m10-bmc-sec-update.c
+++ b/drivers/fpga/intel-m10-bmc-sec-update.c
@@ -574,20 +574,27 @@ static int m10bmc_sec_probe(struct platform_device *pdev)
len = scnprintf(buf, SEC_UPDATE_LEN_MAX, "secure-update%d",
sec->fw_name_id);
sec->fw_name = kmemdup_nul(buf, len, GFP_KERNEL);
- if (!sec->fw_name)
- return -ENOMEM;
+ if (!sec->fw_name) {
+ ret = -ENOMEM;
+ goto fw_name_fail;
+ }
fwl = firmware_upload_register(THIS_MODULE, sec->dev, sec->fw_name,
&m10bmc_ops, sec);
if (IS_ERR(fwl)) {
dev_err(sec->dev, "Firmware Upload driver failed to start\n");
- kfree(sec->fw_name);
- xa_erase(&fw_upload_xa, sec->fw_name_id);
- return PTR_ERR(fwl);
+ ret = PTR_ERR(fwl);
+ goto fw_uploader_fail;
}
sec->fwl = fwl;
return 0;
+
+fw_uploader_fail:
+ kfree(sec->fw_name);
+fw_name_fail:
+ xa_erase(&fw_upload_xa, sec->fw_name_id);
+ return ret;
}
static int m10bmc_sec_remove(struct platform_device *pdev)
diff --git a/drivers/fpga/stratix10-soc.c b/drivers/fpga/stratix10-soc.c
index 357cea58ec98..f7f01982a512 100644
--- a/drivers/fpga/stratix10-soc.c
+++ b/drivers/fpga/stratix10-soc.c
@@ -213,9 +213,9 @@ static int s10_ops_write_init(struct fpga_manager *mgr,
/* Allocate buffers from the service layer's pool. */
for (i = 0; i < NUM_SVC_BUFS; i++) {
kbuf = stratix10_svc_allocate_memory(priv->chan, SVC_BUF_SIZE);
- if (!kbuf) {
+ if (IS_ERR(kbuf)) {
s10_free_buffers(mgr);
- ret = -ENOMEM;
+ ret = PTR_ERR(kbuf);
goto init_done;
}
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index ec7cfd4f52b1..e9917a45b005 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -1531,6 +1531,7 @@ config GPIO_MLXBF2
tristate "Mellanox BlueField 2 SoC GPIO"
depends on (MELLANOX_PLATFORM && ARM64 && ACPI) || (64BIT && COMPILE_TEST)
select GPIO_GENERIC
+ select GPIOLIB_IRQCHIP
help
Say Y here if you want GPIO support on Mellanox BlueField 2 SoC.
diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c
index 8d722e026e9c..84352a6f4973 100644
--- a/drivers/gpio/gpio-eic-sprd.c
+++ b/drivers/gpio/gpio-eic-sprd.c
@@ -91,7 +91,6 @@ enum sprd_eic_type {
struct sprd_eic {
struct gpio_chip chip;
- struct irq_chip intc;
void __iomem *base[SPRD_EIC_MAX_BANK];
enum sprd_eic_type type;
spinlock_t lock;
@@ -255,6 +254,8 @@ static void sprd_eic_irq_mask(struct irq_data *data)
default:
dev_err(chip->parent, "Unsupported EIC type.\n");
}
+
+ gpiochip_disable_irq(chip, offset);
}
static void sprd_eic_irq_unmask(struct irq_data *data)
@@ -263,6 +264,8 @@ static void sprd_eic_irq_unmask(struct irq_data *data)
struct sprd_eic *sprd_eic = gpiochip_get_data(chip);
u32 offset = irqd_to_hwirq(data);
+ gpiochip_enable_irq(chip, offset);
+
switch (sprd_eic->type) {
case SPRD_EIC_DEBOUNCE:
sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IE, 1);
@@ -564,6 +567,15 @@ static void sprd_eic_irq_handler(struct irq_desc *desc)
chained_irq_exit(ic, desc);
}
+static const struct irq_chip sprd_eic_irq = {
+ .name = "sprd-eic",
+ .irq_ack = sprd_eic_irq_ack,
+ .irq_mask = sprd_eic_irq_mask,
+ .irq_unmask = sprd_eic_irq_unmask,
+ .irq_set_type = sprd_eic_irq_set_type,
+ .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
+};
static int sprd_eic_probe(struct platform_device *pdev)
{
const struct sprd_eic_variant_data *pdata;
@@ -626,15 +638,8 @@ static int sprd_eic_probe(struct platform_device *pdev)
break;
}
- sprd_eic->intc.name = dev_name(&pdev->dev);
- sprd_eic->intc.irq_ack = sprd_eic_irq_ack;
- sprd_eic->intc.irq_mask = sprd_eic_irq_mask;
- sprd_eic->intc.irq_unmask = sprd_eic_irq_unmask;
- sprd_eic->intc.irq_set_type = sprd_eic_irq_set_type;
- sprd_eic->intc.flags = IRQCHIP_SKIP_SET_WAKE;
-
irq = &sprd_eic->chip.irq;
- irq->chip = &sprd_eic->intc;
+ gpio_irq_chip_set_chip(irq, &sprd_eic_irq);
irq->handler = handle_bad_irq;
irq->default_type = IRQ_TYPE_NONE;
irq->parent_handler = sprd_eic_irq_handler;
diff --git a/drivers/gpio/gpio-ep93xx.c b/drivers/gpio/gpio-ep93xx.c
index 2e1779709113..6cedf46efec6 100644
--- a/drivers/gpio/gpio-ep93xx.c
+++ b/drivers/gpio/gpio-ep93xx.c
@@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/gpio/driver.h>
#include <linux/bitops.h>
+#include <linux/seq_file.h>
#define EP93XX_GPIO_F_INT_STATUS 0x5c
#define EP93XX_GPIO_A_INT_STATUS 0xa0
@@ -40,7 +41,6 @@
#define EP93XX_GPIO_F_IRQ_BASE 80
struct ep93xx_gpio_irq_chip {
- struct irq_chip ic;
u8 irq_offset;
u8 int_unmasked;
u8 int_enabled;
@@ -148,7 +148,7 @@ static void ep93xx_gpio_f_irq_handler(struct irq_desc *desc)
*/
struct irq_chip *irqchip = irq_desc_get_chip(desc);
unsigned int irq = irq_desc_get_irq(desc);
- int port_f_idx = ((irq + 1) & 7) ^ 4; /* {19..22,47..50} -> {0..7} */
+ int port_f_idx = (irq & 7) ^ 4; /* {20..23,48..51} -> {0..7} */
int gpio_irq = EP93XX_GPIO_F_IRQ_BASE + port_f_idx;
chained_irq_enter(irqchip, desc);
@@ -185,6 +185,7 @@ static void ep93xx_gpio_irq_mask_ack(struct irq_data *d)
ep93xx_gpio_update_int_params(epg, eic);
writeb(port_mask, epg->base + eic->irq_offset + EP93XX_INT_EOI_OFFSET);
+ gpiochip_disable_irq(gc, irqd_to_hwirq(d));
}
static void ep93xx_gpio_irq_mask(struct irq_data *d)
@@ -195,6 +196,7 @@ static void ep93xx_gpio_irq_mask(struct irq_data *d)
eic->int_unmasked &= ~BIT(d->irq & 7);
ep93xx_gpio_update_int_params(epg, eic);
+ gpiochip_disable_irq(gc, irqd_to_hwirq(d));
}
static void ep93xx_gpio_irq_unmask(struct irq_data *d)
@@ -203,6 +205,7 @@ static void ep93xx_gpio_irq_unmask(struct irq_data *d)
struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc);
struct ep93xx_gpio *epg = gpiochip_get_data(gc);
+ gpiochip_enable_irq(gc, irqd_to_hwirq(d));
eic->int_unmasked |= BIT(d->irq & 7);
ep93xx_gpio_update_int_params(epg, eic);
}
@@ -320,15 +323,25 @@ static int ep93xx_gpio_set_config(struct gpio_chip *gc, unsigned offset,
return 0;
}
-static void ep93xx_init_irq_chip(struct device *dev, struct irq_chip *ic)
+static void ep93xx_irq_print_chip(struct irq_data *data, struct seq_file *p)
{
- ic->irq_ack = ep93xx_gpio_irq_ack;
- ic->irq_mask_ack = ep93xx_gpio_irq_mask_ack;
- ic->irq_mask = ep93xx_gpio_irq_mask;
- ic->irq_unmask = ep93xx_gpio_irq_unmask;
- ic->irq_set_type = ep93xx_gpio_irq_type;
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+
+ seq_printf(p, dev_name(gc->parent));
}
+static const struct irq_chip gpio_eic_irq_chip = {
+ .name = "ep93xx-gpio-eic",
+ .irq_ack = ep93xx_gpio_irq_ack,
+ .irq_mask = ep93xx_gpio_irq_mask,
+ .irq_unmask = ep93xx_gpio_irq_unmask,
+ .irq_mask_ack = ep93xx_gpio_irq_mask_ack,
+ .irq_set_type = ep93xx_gpio_irq_type,
+ .irq_print_chip = ep93xx_irq_print_chip,
+ .flags = IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
+};
+
static int ep93xx_gpio_add_bank(struct ep93xx_gpio_chip *egc,
struct platform_device *pdev,
struct ep93xx_gpio *epg,
@@ -350,8 +363,6 @@ static int ep93xx_gpio_add_bank(struct ep93xx_gpio_chip *egc,
girq = &gc->irq;
if (bank->has_irq || bank->has_hierarchical_irq) {
- struct irq_chip *ic;
-
gc->set_config = ep93xx_gpio_set_config;
egc->eic = devm_kcalloc(dev, 1,
sizeof(*egc->eic),
@@ -359,12 +370,7 @@ static int ep93xx_gpio_add_bank(struct ep93xx_gpio_chip *egc,
if (!egc->eic)
return -ENOMEM;
egc->eic->irq_offset = bank->irq;
- ic = &egc->eic->ic;
- ic->name = devm_kasprintf(dev, GFP_KERNEL, "gpio-irq-%s", bank->label);
- if (!ic->name)
- return -ENOMEM;
- ep93xx_init_irq_chip(dev, ic);
- girq->chip = ic;
+ gpio_irq_chip_set_chip(girq, &gpio_eic_irq_chip);
}
if (bank->has_irq) {
diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c
index d5626c572d24..9d0cec4b82a3 100644
--- a/drivers/gpio/gpio-mxc.c
+++ b/drivers/gpio/gpio-mxc.c
@@ -18,6 +18,7 @@
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
+#include <linux/spinlock.h>
#include <linux/syscore_ops.h>
#include <linux/gpio/driver.h>
#include <linux/of.h>
@@ -159,6 +160,7 @@ static int gpio_set_irq_type(struct irq_data *d, u32 type)
{
struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
struct mxc_gpio_port *port = gc->private;
+ unsigned long flags;
u32 bit, val;
u32 gpio_idx = d->hwirq;
int edge;
@@ -197,6 +199,8 @@ static int gpio_set_irq_type(struct irq_data *d, u32 type)
return -EINVAL;
}
+ raw_spin_lock_irqsave(&port->gc.bgpio_lock, flags);
+
if (GPIO_EDGE_SEL >= 0) {
val = readl(port->base + GPIO_EDGE_SEL);
if (edge == GPIO_INT_BOTH_EDGES)
@@ -217,15 +221,20 @@ static int gpio_set_irq_type(struct irq_data *d, u32 type)
writel(1 << gpio_idx, port->base + GPIO_ISR);
port->pad_type[gpio_idx] = type;
- return 0;
+ raw_spin_unlock_irqrestore(&port->gc.bgpio_lock, flags);
+
+ return port->gc.direction_input(&port->gc, gpio_idx);
}
static void mxc_flip_edge(struct mxc_gpio_port *port, u32 gpio)
{
void __iomem *reg = port->base;
+ unsigned long flags;
u32 bit, val;
int edge;
+ raw_spin_lock_irqsave(&port->gc.bgpio_lock, flags);
+
reg += GPIO_ICR1 + ((gpio & 0x10) >> 2); /* lower or upper register */
bit = gpio & 0xf;
val = readl(reg);
@@ -240,9 +249,12 @@ static void mxc_flip_edge(struct mxc_gpio_port *port, u32 gpio)
} else {
pr_err("mxc: invalid configuration for GPIO %d: %x\n",
gpio, edge);
- return;
+ goto unlock;
}
writel(val | (edge << (bit << 1)), reg);
+
+unlock:
+ raw_spin_unlock_irqrestore(&port->gc.bgpio_lock, flags);
}
/* handle 32 interrupts in one status register */
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index a59d61cd44b2..5299e5bb76d6 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -474,6 +474,9 @@ static u8 pcal6534_recalc_addr(struct pca953x_chip *chip, int reg, int off)
case PCAL6524_DEBOUNCE:
pinctrl = ((reg & PCAL_PINCTRL_MASK) >> 1) + 0x1c;
break;
+ default:
+ pinctrl = 0;
+ break;
}
return pinctrl + addr + (off / BANK_SZ);
diff --git a/drivers/gpio/gpio-pmic-eic-sprd.c b/drivers/gpio/gpio-pmic-eic-sprd.c
index e518490c4b68..c3e4d90f6b18 100644
--- a/drivers/gpio/gpio-pmic-eic-sprd.c
+++ b/drivers/gpio/gpio-pmic-eic-sprd.c
@@ -47,7 +47,6 @@ enum {
/**
* struct sprd_pmic_eic - PMIC EIC controller
* @chip: the gpio_chip structure.
- * @intc: the irq_chip structure.
* @map: the regmap from the parent device.
* @offset: the EIC controller's offset address of the PMIC.
* @reg: the array to cache the EIC registers.
@@ -56,7 +55,6 @@ enum {
*/
struct sprd_pmic_eic {
struct gpio_chip chip;
- struct irq_chip intc;
struct regmap *map;
u32 offset;
u8 reg[CACHE_NR_REGS];
@@ -151,15 +149,21 @@ static void sprd_pmic_eic_irq_mask(struct irq_data *data)
{
struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
struct sprd_pmic_eic *pmic_eic = gpiochip_get_data(chip);
+ u32 offset = irqd_to_hwirq(data);
pmic_eic->reg[REG_IE] = 0;
pmic_eic->reg[REG_TRIG] = 0;
+
+ gpiochip_disable_irq(chip, offset);
}
static void sprd_pmic_eic_irq_unmask(struct irq_data *data)
{
struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
struct sprd_pmic_eic *pmic_eic = gpiochip_get_data(chip);
+ u32 offset = irqd_to_hwirq(data);
+
+ gpiochip_enable_irq(chip, offset);
pmic_eic->reg[REG_IE] = 1;
pmic_eic->reg[REG_TRIG] = 1;
@@ -292,6 +296,17 @@ static irqreturn_t sprd_pmic_eic_irq_handler(int irq, void *data)
return IRQ_HANDLED;
}
+static const struct irq_chip pmic_eic_irq_chip = {
+ .name = "sprd-pmic-eic",
+ .irq_mask = sprd_pmic_eic_irq_mask,
+ .irq_unmask = sprd_pmic_eic_irq_unmask,
+ .irq_set_type = sprd_pmic_eic_irq_set_type,
+ .irq_bus_lock = sprd_pmic_eic_bus_lock,
+ .irq_bus_sync_unlock = sprd_pmic_eic_bus_sync_unlock,
+ .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
+};
+
static int sprd_pmic_eic_probe(struct platform_device *pdev)
{
struct gpio_irq_chip *irq;
@@ -338,16 +353,8 @@ static int sprd_pmic_eic_probe(struct platform_device *pdev)
pmic_eic->chip.set = sprd_pmic_eic_set;
pmic_eic->chip.get = sprd_pmic_eic_get;
- pmic_eic->intc.name = dev_name(&pdev->dev);
- pmic_eic->intc.irq_mask = sprd_pmic_eic_irq_mask;
- pmic_eic->intc.irq_unmask = sprd_pmic_eic_irq_unmask;
- pmic_eic->intc.irq_set_type = sprd_pmic_eic_irq_set_type;
- pmic_eic->intc.irq_bus_lock = sprd_pmic_eic_bus_lock;
- pmic_eic->intc.irq_bus_sync_unlock = sprd_pmic_eic_bus_sync_unlock;
- pmic_eic->intc.flags = IRQCHIP_SKIP_SET_WAKE;
-
irq = &pmic_eic->chip.irq;
- irq->chip = &pmic_eic->intc;
+ gpio_irq_chip_set_chip(irq, &pmic_eic_irq_chip);
irq->threaded = true;
ret = devm_gpiochip_add_data(&pdev->dev, &pmic_eic->chip, pmic_eic);
diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c
index 238f3210970c..bc5660f61c57 100644
--- a/drivers/gpio/gpio-sifive.c
+++ b/drivers/gpio/gpio-sifive.c
@@ -215,6 +215,7 @@ static int sifive_gpio_probe(struct platform_device *pdev)
return -ENODEV;
}
parent = irq_find_host(irq_parent);
+ of_node_put(irq_parent);
if (!parent) {
dev_err(dev, "no IRQ parent domain\n");
return -ENODEV;
diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c
index 60514bc5454f..9e3893b19e4f 100644
--- a/drivers/gpio/gpio-sim.c
+++ b/drivers/gpio/gpio-sim.c
@@ -736,7 +736,7 @@ static void gpio_sim_remove_hogs(struct gpio_sim_device *dev)
gpiod_remove_hogs(dev->hogs);
- for (hog = dev->hogs; !hog->chip_label; hog++) {
+ for (hog = dev->hogs; hog->chip_label; hog++) {
kfree(hog->chip_label);
kfree(hog->line_name);
}
diff --git a/drivers/gpio/gpio-sprd.c b/drivers/gpio/gpio-sprd.c
index 9bff63990eee..072b4e653216 100644
--- a/drivers/gpio/gpio-sprd.c
+++ b/drivers/gpio/gpio-sprd.c
@@ -120,6 +120,7 @@ static void sprd_gpio_irq_mask(struct irq_data *data)
u32 offset = irqd_to_hwirq(data);
sprd_gpio_update(chip, offset, SPRD_GPIO_IE, 0);
+ gpiochip_disable_irq(chip, offset);
}
static void sprd_gpio_irq_ack(struct irq_data *data)
@@ -136,6 +137,7 @@ static void sprd_gpio_irq_unmask(struct irq_data *data)
u32 offset = irqd_to_hwirq(data);
sprd_gpio_update(chip, offset, SPRD_GPIO_IE, 1);
+ gpiochip_enable_irq(chip, offset);
}
static int sprd_gpio_irq_set_type(struct irq_data *data,
@@ -205,13 +207,14 @@ static void sprd_gpio_irq_handler(struct irq_desc *desc)
chained_irq_exit(ic, desc);
}
-static struct irq_chip sprd_gpio_irqchip = {
+static const struct irq_chip sprd_gpio_irqchip = {
.name = "sprd-gpio",
.irq_ack = sprd_gpio_irq_ack,
.irq_mask = sprd_gpio_irq_mask,
.irq_unmask = sprd_gpio_irq_unmask,
.irq_set_type = sprd_gpio_irq_set_type,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
};
static int sprd_gpio_probe(struct platform_device *pdev)
@@ -245,7 +248,7 @@ static int sprd_gpio_probe(struct platform_device *pdev)
sprd_gpio->chip.direction_output = sprd_gpio_direction_output;
irq = &sprd_gpio->chip.irq;
- irq->chip = &sprd_gpio_irqchip;
+ gpio_irq_chip_set_chip(irq, &sprd_gpio_irqchip);
irq->handler = handle_bad_irq;
irq->default_type = IRQ_TYPE_NONE;
irq->parent_handler = sprd_gpio_irq_handler;
diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c
index 9db42f6a2043..9033db00c360 100644
--- a/drivers/gpio/gpio-vf610.c
+++ b/drivers/gpio/gpio-vf610.c
@@ -30,7 +30,6 @@ struct fsl_gpio_soc_data {
struct vf610_gpio_port {
struct gpio_chip gc;
- struct irq_chip ic;
void __iomem *base;
void __iomem *gpio_base;
const struct fsl_gpio_soc_data *sdata;
@@ -207,20 +206,24 @@ static int vf610_gpio_irq_set_type(struct irq_data *d, u32 type)
static void vf610_gpio_irq_mask(struct irq_data *d)
{
- struct vf610_gpio_port *port =
- gpiochip_get_data(irq_data_get_irq_chip_data(d));
- void __iomem *pcr_base = port->base + PORT_PCR(d->hwirq);
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct vf610_gpio_port *port = gpiochip_get_data(gc);
+ irq_hw_number_t gpio_num = irqd_to_hwirq(d);
+ void __iomem *pcr_base = port->base + PORT_PCR(gpio_num);
vf610_gpio_writel(0, pcr_base);
+ gpiochip_disable_irq(gc, gpio_num);
}
static void vf610_gpio_irq_unmask(struct irq_data *d)
{
- struct vf610_gpio_port *port =
- gpiochip_get_data(irq_data_get_irq_chip_data(d));
- void __iomem *pcr_base = port->base + PORT_PCR(d->hwirq);
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct vf610_gpio_port *port = gpiochip_get_data(gc);
+ irq_hw_number_t gpio_num = irqd_to_hwirq(d);
+ void __iomem *pcr_base = port->base + PORT_PCR(gpio_num);
- vf610_gpio_writel(port->irqc[d->hwirq] << PORT_PCR_IRQC_OFFSET,
+ gpiochip_enable_irq(gc, gpio_num);
+ vf610_gpio_writel(port->irqc[gpio_num] << PORT_PCR_IRQC_OFFSET,
pcr_base);
}
@@ -237,6 +240,17 @@ static int vf610_gpio_irq_set_wake(struct irq_data *d, u32 enable)
return 0;
}
+static const struct irq_chip vf610_irqchip = {
+ .name = "gpio-vf610",
+ .irq_ack = vf610_gpio_irq_ack,
+ .irq_mask = vf610_gpio_irq_mask,
+ .irq_unmask = vf610_gpio_irq_unmask,
+ .irq_set_type = vf610_gpio_irq_set_type,
+ .irq_set_wake = vf610_gpio_irq_set_wake,
+ .flags = IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
+};
+
static void vf610_gpio_disable_clk(void *data)
{
clk_disable_unprepare(data);
@@ -249,7 +263,6 @@ static int vf610_gpio_probe(struct platform_device *pdev)
struct vf610_gpio_port *port;
struct gpio_chip *gc;
struct gpio_irq_chip *girq;
- struct irq_chip *ic;
int i;
int ret;
@@ -315,14 +328,6 @@ static int vf610_gpio_probe(struct platform_device *pdev)
gc->direction_output = vf610_gpio_direction_output;
gc->set = vf610_gpio_set;
- ic = &port->ic;
- ic->name = "gpio-vf610";
- ic->irq_ack = vf610_gpio_irq_ack;
- ic->irq_mask = vf610_gpio_irq_mask;
- ic->irq_unmask = vf610_gpio_irq_unmask;
- ic->irq_set_type = vf610_gpio_irq_set_type;
- ic->irq_set_wake = vf610_gpio_irq_set_wake;
-
/* Mask all GPIO interrupts */
for (i = 0; i < gc->ngpio; i++)
vf610_gpio_writel(0, port->base + PORT_PCR(i));
@@ -331,7 +336,7 @@ static int vf610_gpio_probe(struct platform_device *pdev)
vf610_gpio_writel(~0, port->base + PORT_ISFR);
girq = &gc->irq;
- girq->chip = ic;
+ gpio_irq_chip_set_chip(girq, &vf610_irqchip);
girq->parent_handler = vf610_gpio_irq_handler;
girq->num_parents = 1;
girq->parents = devm_kcalloc(&pdev->dev, 1,
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index bed0380c5136..34ff048e70d0 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -385,7 +385,7 @@ err:
}
static bool acpi_gpio_irq_is_wake(struct device *parent,
- struct acpi_resource_gpio *agpio)
+ const struct acpi_resource_gpio *agpio)
{
unsigned int pin = agpio->pin_table[0];
@@ -778,7 +778,7 @@ static int acpi_populate_gpio_lookup(struct acpi_resource *ares, void *data)
lookup->info.pin_config = agpio->pin_config;
lookup->info.debounce = agpio->debounce_timeout;
lookup->info.gpioint = gpioint;
- lookup->info.wake_capable = agpio->wake_capable == ACPI_WAKE_CAPABLE;
+ lookup->info.wake_capable = acpi_gpio_irq_is_wake(&lookup->info.adev->dev, agpio);
/*
* Polarity and triggering are only specified for GpioInt
@@ -1104,7 +1104,8 @@ int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *name, in
dev_dbg(&adev->dev, "IRQ %d already in use\n", irq);
}
- if (wake_capable)
+ /* avoid suspend issues with GPIOs when systems are using S3 */
+ if (wake_capable && acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)
*wake_capable = info.wake_capable;
return irq;
@@ -1623,6 +1624,31 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = {
.ignore_interrupt = "AMDI0030:00@18",
},
},
+ {
+ /*
+ * Spurious wakeups from TP_ATTN# pin
+ * Found in BIOS 1.7.8
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/1722#note_1720627
+ */
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_wake = "ELAN0415:00@9",
+ },
+ },
+ {
+ /*
+ * Spurious wakeups from TP_ATTN# pin
+ * Found in BIOS 1.7.7
+ */
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "NH5xAx"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_wake = "SYNA1202:00@16",
+ },
+ },
{} /* Terminating entry */
};
diff --git a/drivers/gpio/gpiolib-acpi.h b/drivers/gpio/gpiolib-acpi.h
index 9475f99a9694..5a08693b8fb1 100644
--- a/drivers/gpio/gpiolib-acpi.h
+++ b/drivers/gpio/gpiolib-acpi.h
@@ -14,7 +14,6 @@
#include <linux/gpio/consumer.h>
-struct acpi_device;
struct device;
struct fwnode_handle;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 5a66d9616d7c..939c776b9488 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -3905,8 +3905,8 @@ static struct gpio_desc *gpiod_find_and_request(struct device *consumer,
const char *label,
bool platform_lookup_allowed)
{
+ unsigned long lookupflags = GPIO_LOOKUP_FLAGS_DEFAULT;
struct gpio_desc *desc = ERR_PTR(-ENOENT);
- unsigned long lookupflags;
int ret;
if (!IS_ERR_OR_NULL(fwnode))
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 315cbdf61979..9abfb482b615 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -53,7 +53,8 @@ config DRM_DEBUG_MM
config DRM_USE_DYNAMIC_DEBUG
bool "use dynamic debug to implement drm.debug"
- default y
+ default n
+ depends on BROKEN
depends on DRM
depends on DYNAMIC_DEBUG || DYNAMIC_DEBUG_CORE
depends on JUMP_LABEL
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6b74df446694..d148a1bd85e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -195,6 +195,7 @@ extern int amdgpu_emu_mode;
extern uint amdgpu_smu_memory_pool_size;
extern int amdgpu_smu_pptable_id;
extern uint amdgpu_dc_feature_mask;
+extern uint amdgpu_freesync_vid_mode;
extern uint amdgpu_dc_debug_mask;
extern uint amdgpu_dc_visual_confirm;
extern uint amdgpu_dm_abm_level;
@@ -242,6 +243,7 @@ extern int amdgpu_num_kcq;
#define AMDGPU_VCNFW_LOG_SIZE (32 * 1024)
extern int amdgpu_vcnfw_log;
+extern int amdgpu_sg_display;
#define AMDGPU_VM_MAX_NUM_CTX 4096
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index b15091d8310d..3b5c53712d31 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2099,7 +2099,7 @@ int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_b
}
amdgpu_amdkfd_remove_eviction_fence(
- bo, bo->kfd_bo->process_info->eviction_fence);
+ bo, bo->vm_bo->vm->process_info->eviction_fence);
amdgpu_bo_unreserve(bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 8516c814bc9b..7af3041ccd0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -61,6 +61,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
amdgpu_ctx_put(p->ctx);
return -ECANCELED;
}
+
+ amdgpu_sync_create(&p->sync);
return 0;
}
@@ -452,18 +454,6 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
}
r = amdgpu_sync_fence(&p->sync, fence);
- if (r)
- goto error;
-
- /*
- * When we have an explicit dependency it might be necessary to insert a
- * pipeline sync to make sure that all caches etc are flushed and the
- * next job actually sees the results from the previous one.
- */
- if (fence->context == p->gang_leader->base.entity->fence_context)
- r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence);
-
-error:
dma_fence_put(fence);
return r;
}
@@ -1188,10 +1178,19 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct drm_gpu_scheduler *sched;
struct amdgpu_bo_list_entry *e;
+ struct dma_fence *fence;
unsigned int i;
int r;
+ r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);
+ if (r) {
+ if (r != -ERESTARTSYS)
+ DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
+ return r;
+ }
+
list_for_each_entry(e, &p->validated, tv.head) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
struct dma_resv *resv = bo->tbo.base.resv;
@@ -1211,10 +1210,27 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
return r;
}
- r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);
- if (r && r != -ERESTARTSYS)
- DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
- return r;
+ sched = p->gang_leader->base.entity->rq->sched;
+ while ((fence = amdgpu_sync_get_fence(&p->sync))) {
+ struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);
+
+ /*
+ * When we have an dependency it might be necessary to insert a
+ * pipeline sync to make sure that all caches etc are flushed and the
+ * next job actually sees the results from the previous one
+ * before we start executing on the same scheduler ring.
+ */
+ if (!s_fence || s_fence->sched != sched) {
+ dma_fence_put(fence);
+ continue;
+ }
+
+ r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence);
+ dma_fence_put(fence);
+ if (r)
+ return r;
+ }
+ return 0;
}
static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
@@ -1254,9 +1270,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
continue;
fence = &p->jobs[i]->base.s_fence->scheduled;
+ dma_fence_get(fence);
r = drm_sched_job_add_dependency(&leader->base, fence);
- if (r)
+ if (r) {
+ dma_fence_put(fence);
goto error_cleanup;
+ }
}
if (p->gang_size > 1) {
@@ -1344,6 +1363,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
{
unsigned i;
+ amdgpu_sync_free(&parser->sync);
for (i = 0; i < parser->num_post_deps; i++) {
drm_syncobj_put(parser->post_deps[i].syncobj);
kfree(parser->post_deps[i].chain);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index afe6af9c0138..fbf2f24169eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -36,6 +36,7 @@
#include <generated/utsrelease.h>
#include <linux/pci-p2pdma.h>
+#include <drm/drm_aperture.h>
#include <drm/drm_atomic_helper.h>
#include <drm/drm_fb_helper.h>
#include <drm/drm_probe_helper.h>
@@ -90,6 +91,8 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
#define AMDGPU_MAX_RETRY_LIMIT 2
#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
+static const struct drm_driver amdgpu_kms_driver;
+
const char *amdgpu_asic_name[] = {
"TAHITI",
"PITCAIRN",
@@ -3687,6 +3690,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
return r;
+ /* Get rid of things like offb */
+ r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
+ if (r)
+ return r;
+
/* Enable TMZ based on IP_VERSION */
amdgpu_gmc_tmz_set(adev);
@@ -4260,6 +4268,9 @@ exit:
}
adev->in_suspend = false;
+ if (adev->enable_mes)
+ amdgpu_mes_self_test(adev);
+
if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
DRM_WARN("smart shift update failed\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index b4f2d61ea0d5..3fe277bc233f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -23,7 +23,6 @@
*/
#include <drm/amdgpu_drm.h>
-#include <drm/drm_aperture.h>
#include <drm/drm_drv.h>
#include <drm/drm_fbdev_generic.h>
#include <drm/drm_gem.h>
@@ -181,11 +180,13 @@ int amdgpu_mes_kiq;
int amdgpu_noretry = -1;
int amdgpu_force_asic_type = -1;
int amdgpu_tmz = -1; /* auto */
+uint amdgpu_freesync_vid_mode;
int amdgpu_reset_method = -1; /* auto */
int amdgpu_num_kcq = -1;
int amdgpu_smartshift_bias;
int amdgpu_use_xgmi_p2p = 1;
int amdgpu_vcnfw_log;
+int amdgpu_sg_display = -1; /* auto */
static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
@@ -880,6 +881,32 @@ MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto (default), 0 = off, 1 = on)
module_param_named(tmz, amdgpu_tmz, int, 0444);
/**
+ * DOC: freesync_video (uint)
+ * Enable the optimization to adjust front porch timing to achieve seamless
+ * mode change experience when setting a freesync supported mode for which full
+ * modeset is not needed.
+ *
+ * The Display Core will add a set of modes derived from the base FreeSync
+ * video mode into the corresponding connector's mode list based on commonly
+ * used refresh rates and VRR range of the connected display, when users enable
+ * this feature. From the userspace perspective, they can see a seamless mode
+ * change experience when the change between different refresh rates under the
+ * same resolution. Additionally, userspace applications such as Video playback
+ * can read this modeset list and change the refresh rate based on the video
+ * frame rate. Finally, the userspace can also derive an appropriate mode for a
+ * particular refresh rate based on the FreeSync Mode and add it to the
+ * connector's mode list.
+ *
+ * Note: This is an experimental feature.
+ *
+ * The default value: 0 (off).
+ */
+MODULE_PARM_DESC(
+ freesync_video,
+ "Enable freesync modesetting optimization feature (0 = off (default), 1 = on)");
+module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444);
+
+/**
* DOC: reset_method (int)
* GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)
*/
@@ -906,6 +933,16 @@ MODULE_PARM_DESC(vcnfw_log, "Enable vcnfw log(0 = disable (default value), 1 = e
module_param_named(vcnfw_log, amdgpu_vcnfw_log, int, 0444);
/**
+ * DOC: sg_display (int)
+ * Disable S/G (scatter/gather) display (i.e., display from system memory).
+ * This option is only relevant on APUs. Set this option to 0 to disable
+ * S/G display if you experience flickering or other issues under memory
+ * pressure and report the issue.
+ */
+MODULE_PARM_DESC(sg_display, "S/G Display (-1 = auto (default), 0 = disable)");
+module_param_named(sg_display, amdgpu_sg_display, int, 0444);
+
+/**
* DOC: smu_pptable_id (int)
* Used to override pptable id. id = 0 use VBIOS pptable.
* id > 0 use the soft pptable with specicfied id.
@@ -2095,11 +2132,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
}
#endif
- /* Get rid of things like offb */
- ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &amdgpu_kms_driver);
- if (ret)
- return ret;
-
adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev);
if (IS_ERR(adev))
return PTR_ERR(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 00444203220d..faff4a3f96e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -618,7 +618,13 @@ void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev)
if (!ring || !ring->fence_drv.initialized)
continue;
- if (!ring->no_scheduler)
+ /*
+ * Notice we check for sched.ops since there's some
+ * override on the meaning of sched.ready by amdgpu.
+ * The natural check would be sched.ready, which is
+ * set as drm_sched_init() finishes...
+ */
+ if (ring->sched.ops)
drm_sched_fini(&ring->sched);
for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 23692e5d4d13..3380daf42da8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -156,6 +156,9 @@ static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
return amdgpu_compute_multipipe == 1;
}
+ if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
+ return true;
+
/* FIXME: spreading the queues across pipes causes perf regressions
* on POLARIS11 compute workloads */
if (adev->asic_type == CHIP_POLARIS11)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index fcb711a11a5b..3f07b1a2ce47 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -497,6 +497,7 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
!--id_mgr->reserved_use_count) {
/* give the reserved ID back to normal round robin */
list_add(&id_mgr->reserved->list, &id_mgr->ids_lru);
+ id_mgr->reserved = NULL;
}
vm->reserved_vmid[vmhub] = false;
mutex_unlock(&id_mgr->lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 9e549923622b..c3d9d75143f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -161,8 +161,14 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
struct dma_fence *f;
unsigned i;
- /* use sched fence if available */
- f = job->base.s_fence ? &job->base.s_fence->finished : &job->hw_fence;
+ /* Check if any fences where initialized */
+ if (job->base.s_fence && job->base.s_fence->finished.ops)
+ f = &job->base.s_fence->finished;
+ else if (job->hw_fence.ops)
+ f = &job->hw_fence;
+ else
+ f = NULL;
+
for (i = 0; i < job->num_ibs; ++i)
amdgpu_ib_free(ring->adev, &job->ibs[i], f);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 4e684c2afc70..25a68d8888e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -470,8 +470,9 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
return true;
fail:
- DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size,
- man->size);
+ if (man)
+ DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size,
+ man->size);
return false;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index f752c7ae7f60..3989e755a5b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -295,7 +295,7 @@ struct amdgpu_ring {
#define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
#define amdgpu_ring_patch_cs_in_place(r, p, job, ib) ((r)->funcs->patch_cs_in_place((p), (job), (ib)))
#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
-#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
+#define amdgpu_ring_test_ib(r, t) ((r)->funcs->test_ib ? (r)->funcs->test_ib((r), (t)) : 0)
#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index bac7976975bd..dcd8c066bc1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -391,8 +391,10 @@ int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job)
dma_fence_get(f);
r = drm_sched_job_add_dependency(&job->base, f);
- if (r)
+ if (r) {
+ dma_fence_put(f);
return r;
+ }
}
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index b5f3bba851db..01e42bdd8e4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -974,7 +974,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
min(nptes, 32u), dst, incr,
upd_flags,
- vm->task_info.pid,
+ vm->task_info.tgid,
vm->immediate.fence_context);
amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt),
cursor.level, pe_start, dst,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index faa12146635c..9fa1d814508a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -882,7 +882,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
- drm_buddy_free_list(&mgr->mm, &rsv->blocks);
+ drm_buddy_free_list(&mgr->mm, &rsv->allocated);
kfree(rsv);
}
drm_buddy_fini(&mgr->mm);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index a56c6e106d00..66eb102cd88f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -790,8 +790,8 @@ static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd,
* zero here */
WARN_ON(simd != 0);
- /* type 2 wave data */
- dst[(*no_fields)++] = 2;
+ /* type 3 wave data */
+ dst[(*no_fields)++] = 3;
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
@@ -1287,10 +1287,8 @@ static int gfx_v11_0_sw_init(void *handle)
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(11, 0, 0):
- case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
- case IP_VERSION(11, 0, 4):
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
adev->gfx.me.num_queue_per_pipe = 1;
@@ -1298,6 +1296,15 @@ static int gfx_v11_0_sw_init(void *handle)
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 4;
break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ adev->gfx.me.num_me = 1;
+ adev->gfx.me.num_pipe_per_me = 1;
+ adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.mec.num_mec = 1;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 4;
+ break;
default:
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index f202b45c413c..5dde6f82a1ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -6877,7 +6877,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
.test_ring = gfx_v9_0_ring_test_ring,
- .test_ib = gfx_v9_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_switch_buffer = gfx_v9_ring_emit_sb,
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
index 95548c512f4f..077c53c6cc08 100644
--- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
@@ -35,6 +35,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_1_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_imu.bin");
static int imu_v11_0_init_microcode(struct amdgpu_device *adev)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 970b066b37bb..1c4787000a5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -40,6 +40,8 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes1.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes1.bin");
static int mes_v11_0_hw_fini(void *handle);
static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev);
@@ -196,7 +198,6 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
mes_add_queue_pkt.tma_addr = input->tma_addr;
mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
- mes_add_queue_pkt.trap_en = 1;
/* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
@@ -1343,7 +1344,7 @@ static int mes_v11_0_late_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* it's only intended for use in mes_self_test case, not for s0ix and reset */
- if (!amdgpu_in_reset(adev) && !adev->in_s0ix &&
+ if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend &&
(adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3)))
amdgpu_mes_self_test(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
index 15eb3658d70e..09fdcd20cb91 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
@@ -337,7 +337,13 @@ const struct nbio_hdp_flush_reg nbio_v4_3_hdp_flush_reg = {
static void nbio_v4_3_init_registers(struct amdgpu_device *adev)
{
- return;
+ if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(4, 3, 0)) {
+ uint32_t data;
+
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2);
+ data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK;
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data);
+ }
}
static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 5562670b7b52..7050238c4c48 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -640,7 +640,10 @@ static int soc21_common_early_init(void *handle)
AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_CGLS |
AMD_CG_SUPPORT_REPEATER_FGCG |
- AMD_CG_SUPPORT_GFX_MGCG;
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS;
adev->pg_flags = AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG |
AMD_PG_SUPPORT_JPEG;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index ecb4c3abc629..c06ada0844ba 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -200,7 +200,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
if (q->wptr_bo) {
- wptr_addr_off = (uint64_t)q->properties.write_ptr - (uint64_t)q->wptr_bo->kfd_bo->va;
+ wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1);
queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 814f99888ab1..b94d2c1422ad 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -570,6 +570,15 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
goto reserve_bo_failed;
}
+ if (clear) {
+ r = amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
+ if (r) {
+ pr_debug("failed %d to sync bo\n", r);
+ amdgpu_bo_unreserve(bo);
+ goto reserve_bo_failed;
+ }
+ }
+
r = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
if (r) {
pr_debug("failed %d to reserve bo\n", r);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index bceb1a5b2518..3fdaba56be6f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -801,7 +801,7 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
p2plink->attr.name = "properties";
p2plink->attr.mode = KFD_SYSFS_FILE_MODE;
- sysfs_attr_init(&iolink->attr);
+ sysfs_attr_init(&p2plink->attr);
ret = sysfs_create_file(p2plink->kobj, &p2plink->attr);
if (ret < 0)
return ret;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 86bc23a67d97..9c7b69d377bd 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1184,24 +1184,38 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_
memset(pa_config, 0, sizeof(*pa_config));
- logical_addr_low = min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18;
- pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
-
- if (adev->apu_flags & AMD_APU_IS_RAVEN2)
- /*
- * Raven2 has a HW issue that it is unable to use the vram which
- * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
- * workaround that increase system aperture high address (add 1)
- * to get rid of the VM fault and hardware hang.
- */
- logical_addr_high = max((adev->gmc.fb_end >> 18) + 0x1, adev->gmc.agp_end >> 18);
- else
- logical_addr_high = max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18;
-
agp_base = 0;
agp_bot = adev->gmc.agp_start >> 24;
agp_top = adev->gmc.agp_end >> 24;
+ /* AGP aperture is disabled */
+ if (agp_bot == agp_top) {
+ logical_addr_low = adev->gmc.vram_start >> 18;
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ /*
+ * Raven2 has a HW issue that it is unable to use the vram which
+ * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
+ * workaround that increase system aperture high address (add 1)
+ * to get rid of the VM fault and hardware hang.
+ */
+ logical_addr_high = (adev->gmc.fb_end >> 18) + 0x1;
+ else
+ logical_addr_high = adev->gmc.vram_end >> 18;
+ } else {
+ logical_addr_low = min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18;
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ /*
+ * Raven2 has a HW issue that it is unable to use the vram which
+ * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
+ * workaround that increase system aperture high address (add 1)
+ * to get rid of the VM fault and hardware hang.
+ */
+ logical_addr_high = max((adev->gmc.fb_end >> 18) + 0x1, adev->gmc.agp_end >> 18);
+ else
+ logical_addr_high = max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18;
+ }
+
+ pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
page_table_start.high_part = (u32)(adev->gmc.gart_start >> 44) & 0xF;
page_table_start.low_part = (u32)(adev->gmc.gart_start >> 12);
@@ -1513,6 +1527,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
}
break;
}
+ if (init_data.flags.gpu_vm_support &&
+ (amdgpu_sg_display == 0))
+ init_data.flags.gpu_vm_support = false;
if (init_data.flags.gpu_vm_support)
adev->mode_info.gpu_vm_support = true;
@@ -1730,10 +1747,6 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
adev->dm.vblank_control_workqueue = NULL;
}
- for (i = 0; i < adev->dm.display_indexes_num; i++) {
- drm_encoder_cleanup(&adev->dm.mst_encoders[i].base);
- }
-
amdgpu_dm_destroy_drm_device(&adev->dm);
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
@@ -4507,6 +4520,17 @@ DEVICE_ATTR_WO(s3_debug);
static int dm_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ struct atom_context *ctx = mode_info->atom_context;
+ int index = GetIndexIntoMasterTable(DATA, Object_Header);
+ u16 data_offset;
+
+ /* if there is no object header, skip DM */
+ if (!amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) {
+ adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
+ dev_info(adev->dev, "No object header, skipping DM\n");
+ return -ENOENT;
+ }
switch (adev->asic_type) {
#if defined(CONFIG_DRM_AMD_DC_SI)
@@ -5311,8 +5335,6 @@ static void fill_stream_properties_from_drm_display_mode(
timing_out->aspect_ratio = get_aspect_ratio(mode_in);
- stream->output_color_space = get_output_color_space(timing_out);
-
stream->out_transfer_func->type = TF_TYPE_PREDEFINED;
stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB;
if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) {
@@ -5323,6 +5345,8 @@ static void fill_stream_properties_from_drm_display_mode(
adjust_colour_depth_from_display_info(timing_out, info);
}
}
+
+ stream->output_color_space = get_output_color_space(timing_out);
}
static void fill_audio_info(struct audio_info *audio_info,
@@ -5835,7 +5859,8 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
*/
DRM_DEBUG_DRIVER("No preferred mode found\n");
} else {
- recalculate_timing = is_freesync_video_mode(&mode, aconnector);
+ recalculate_timing = amdgpu_freesync_vid_mode &&
+ is_freesync_video_mode(&mode, aconnector);
if (recalculate_timing) {
freesync_mode = get_highest_refresh_rate_mode(aconnector, false);
drm_mode_copy(&saved_mode, &mode);
@@ -6986,7 +7011,7 @@ static void amdgpu_dm_connector_add_freesync_modes(struct drm_connector *connect
struct amdgpu_dm_connector *amdgpu_dm_connector =
to_amdgpu_dm_connector(connector);
- if (!edid)
+ if (!(amdgpu_freesync_vid_mode && edid))
return;
if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10)
@@ -8850,7 +8875,8 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
* TODO: Refactor this function to allow this check to work
* in all conditions.
*/
- if (dm_new_crtc_state->stream &&
+ if (amdgpu_freesync_vid_mode &&
+ dm_new_crtc_state->stream &&
is_timing_unchanged_for_freesync(new_crtc_state, old_crtc_state))
goto skip_modeset;
@@ -8885,7 +8911,14 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
if (!dm_old_crtc_state->stream)
goto skip_modeset;
- if (dm_new_crtc_state->stream &&
+ /* Unset freesync video if it was active before */
+ if (dm_old_crtc_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED) {
+ dm_new_crtc_state->freesync_config.state = VRR_STATE_INACTIVE;
+ dm_new_crtc_state->freesync_config.fixed_refresh_in_uhz = 0;
+ }
+
+ /* Now check if we should set freesync video mode */
+ if (amdgpu_freesync_vid_mode && dm_new_crtc_state->stream &&
is_timing_unchanged_for_freesync(new_crtc_state,
old_crtc_state)) {
new_crtc_state->mode_changed = false;
@@ -8897,7 +8930,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
set_freesync_fixed_config(dm_new_crtc_state);
goto skip_modeset;
- } else if (aconnector &&
+ } else if (amdgpu_freesync_vid_mode && aconnector &&
is_freesync_video_mode(&new_crtc_state->mode,
aconnector)) {
struct drm_display_mode *high_mode;
@@ -9501,6 +9534,8 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
bool lock_and_validation_needed = false;
struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state;
#if defined(CONFIG_DRM_AMD_DC_DCN)
+ struct drm_dp_mst_topology_mgr *mgr;
+ struct drm_dp_mst_topology_state *mst_state;
struct dsc_mst_fairness_vars vars[MAX_PIPES];
#endif
@@ -9528,8 +9563,8 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
goto fail;
}
- if (dm_old_con_state->abm_level !=
- dm_new_con_state->abm_level)
+ if (dm_old_con_state->abm_level != dm_new_con_state->abm_level ||
+ dm_old_con_state->scaling != dm_new_con_state->scaling)
new_crtc_state->connectors_changed = true;
}
@@ -9623,7 +9658,11 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
* `dcn10_can_pipe_disable_cursor`). By now, all modified planes are in
* atomic state, so call drm helper to normalize zpos.
*/
- drm_atomic_normalize_zpos(dev, state);
+ ret = drm_atomic_normalize_zpos(dev, state);
+ if (ret) {
+ drm_dbg(dev, "drm_atomic_normalize_zpos() failed\n");
+ goto fail;
+ }
/* Remove exiting planes if they are modified */
for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) {
@@ -9749,6 +9788,28 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
lock_and_validation_needed = true;
}
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+ /* set the slot info for each mst_state based on the link encoding format */
+ for_each_new_mst_mgr_in_state(state, mgr, mst_state, i) {
+ struct amdgpu_dm_connector *aconnector;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ u8 link_coding_cap;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ if (connector->index == mst_state->mgr->conn_base_id) {
+ aconnector = to_amdgpu_dm_connector(connector);
+ link_coding_cap = dc_link_dp_mst_decide_link_encoding_format(aconnector->dc_link);
+ drm_dp_mst_update_slots(mst_state, link_coding_cap);
+
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+ }
+#endif
+
/**
* Streams and planes are reset when there are changes that affect
* bandwidth. Anything that affects bandwidth needs to go through
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index 6994c9a1ed85..5cff56bb8f56 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -120,23 +120,50 @@ enum dc_edid_status dm_helpers_parse_edid_caps(
}
static void
-fill_dc_mst_payload_table_from_drm(struct drm_dp_mst_topology_state *mst_state,
- struct amdgpu_dm_connector *aconnector,
+fill_dc_mst_payload_table_from_drm(struct dc_link *link,
+ bool enable,
+ struct drm_dp_mst_atomic_payload *target_payload,
struct dc_dp_mst_stream_allocation_table *table)
{
struct dc_dp_mst_stream_allocation_table new_table = { 0 };
struct dc_dp_mst_stream_allocation *sa;
- struct drm_dp_mst_atomic_payload *payload;
+ struct link_mst_stream_allocation_table copy_of_link_table =
+ link->mst_stream_alloc_table;
+
+ int i;
+ int current_hw_table_stream_cnt = copy_of_link_table.stream_count;
+ struct link_mst_stream_allocation *dc_alloc;
+
+ /* TODO: refactor to set link->mst_stream_alloc_table directly if possible.*/
+ if (enable) {
+ dc_alloc =
+ &copy_of_link_table.stream_allocations[current_hw_table_stream_cnt];
+ dc_alloc->vcp_id = target_payload->vcpi;
+ dc_alloc->slot_count = target_payload->time_slots;
+ } else {
+ for (i = 0; i < copy_of_link_table.stream_count; i++) {
+ dc_alloc =
+ &copy_of_link_table.stream_allocations[i];
+
+ if (dc_alloc->vcp_id == target_payload->vcpi) {
+ dc_alloc->vcp_id = 0;
+ dc_alloc->slot_count = 0;
+ break;
+ }
+ }
+ ASSERT(i != copy_of_link_table.stream_count);
+ }
/* Fill payload info*/
- list_for_each_entry(payload, &mst_state->payloads, next) {
- if (payload->delete)
- continue;
-
- sa = &new_table.stream_allocations[new_table.stream_count];
- sa->slot_count = payload->time_slots;
- sa->vcp_id = payload->vcpi;
- new_table.stream_count++;
+ for (i = 0; i < MAX_CONTROLLER_NUM; i++) {
+ dc_alloc =
+ &copy_of_link_table.stream_allocations[i];
+ if (dc_alloc->vcp_id > 0 && dc_alloc->slot_count > 0) {
+ sa = &new_table.stream_allocations[new_table.stream_count];
+ sa->slot_count = dc_alloc->slot_count;
+ sa->vcp_id = dc_alloc->vcp_id;
+ new_table.stream_count++;
+ }
}
/* Overwrite the old table */
@@ -185,7 +212,7 @@ bool dm_helpers_dp_mst_write_payload_allocation_table(
* AUX message. The sequence is slot 1-63 allocated sequence for each
* stream. AMD ASIC stream slot allocation should follow the same
* sequence. copy DRM MST allocation to dc */
- fill_dc_mst_payload_table_from_drm(mst_state, aconnector, proposed_table);
+ fill_dc_mst_payload_table_from_drm(stream->link, enable, payload, proposed_table);
return true;
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 1edf7385f8d8..abdbd4352f6f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -468,7 +468,6 @@ static const struct drm_connector_helper_funcs dm_dp_mst_connector_helper_funcs
static void amdgpu_dm_encoder_destroy(struct drm_encoder *encoder)
{
drm_encoder_cleanup(encoder);
- kfree(encoder);
}
static const struct drm_encoder_funcs amdgpu_dm_encoder_funcs = {
@@ -904,11 +903,6 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
if (IS_ERR(mst_state))
return PTR_ERR(mst_state);
- mst_state->pbn_div = dm_mst_get_pbn_divider(dc_link);
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- drm_dp_mst_update_slots(mst_state, dc_link_dp_mst_decide_link_encoding_format(dc_link));
-#endif
-
/* Set up params */
for (i = 0; i < dc_state->stream_count; i++) {
struct dc_dsc_policy dsc_policy = {0};
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index 471078fc3900..652270a0b498 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -90,8 +90,8 @@ static const struct out_csc_color_matrix_type output_csc_matrix[] = {
{ 0xE00, 0xF349, 0xFEB7, 0x1000, 0x6CE, 0x16E3,
0x24F, 0x200, 0xFCCB, 0xF535, 0xE00, 0x1000} },
{ COLOR_SPACE_YCBCR2020_TYPE,
- { 0x1000, 0xF149, 0xFEB7, 0x0000, 0x0868, 0x15B2,
- 0x01E6, 0x0000, 0xFB88, 0xF478, 0x1000, 0x0000} },
+ { 0x1000, 0xF149, 0xFEB7, 0x1004, 0x0868, 0x15B2,
+ 0x01E6, 0x201, 0xFB88, 0xF478, 0x1000, 0x1004} },
{ COLOR_SPACE_YCBCR709_BLACK_TYPE,
{ 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x0000,
0x0000, 0x0200, 0x0000, 0x0000, 0x0000, 0x1000} },
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 342e906ae26e..c88f044666fe 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -3995,10 +3995,13 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx)
struct fixed31_32 avg_time_slots_per_mtp = dc_fixpt_from_int(0);
int i;
bool mst_mode = (link->type == dc_connection_mst_branch);
+ /* adjust for drm changes*/
+ bool update_drm_mst_state = true;
const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res);
const struct dc_link_settings empty_link_settings = {0};
DC_LOGGER_INIT(link->ctx->logger);
+
/* deallocate_mst_payload is called before disable link. When mode or
* disable/enable monitor, new stream is created which is not in link
* stream[] yet. For this, payload is not allocated yet, so de-alloc
@@ -4014,7 +4017,7 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx)
&empty_link_settings,
avg_time_slots_per_mtp);
- if (mst_mode) {
+ if (mst_mode || update_drm_mst_state) {
/* when link is in mst mode, reply on mst manager to remove
* payload
*/
@@ -4077,11 +4080,18 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx)
stream->ctx,
stream);
+ if (!update_drm_mst_state)
+ dm_helpers_dp_mst_send_payload_allocation(
+ stream->ctx,
+ stream,
+ false);
+ }
+
+ if (update_drm_mst_state)
dm_helpers_dp_mst_send_payload_allocation(
stream->ctx,
stream,
false);
- }
return DC_OK;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index fe2023f18b7d..8f894c1d1d1e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -3626,7 +3626,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
(int)hubp->curs_attr.width || pos_cpy.x
<= (int)hubp->curs_attr.width +
pipe_ctx->plane_state->src_rect.x) {
- pos_cpy.x = temp_x + viewport_width;
+ pos_cpy.x = 2 * viewport_width - temp_x;
}
}
} else {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
index f9ea1e86707f..79850a68f62a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
@@ -874,8 +874,9 @@ static const struct dc_plane_cap plane_cap = {
},
// 6:1 downscaling ratio: 1000/6 = 166.666
+ // 4:1 downscaling ratio for ARGB888 to prevent underflow during P010 playback: 1000/4 = 250
.max_downscale_factor = {
- .argb8888 = 167,
+ .argb8888 = 250,
.nv12 = 167,
.fp16 = 167
},
@@ -1763,7 +1764,7 @@ static bool dcn314_resource_construct(
pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE;
pool->base.pipe_count = pool->base.res_cap->num_timing_generator;
pool->base.mpcc_count = pool->base.res_cap->num_timing_generator;
- dc->caps.max_downscale_ratio = 600;
+ dc->caps.max_downscale_ratio = 400;
dc->caps.i2c_speed_in_khz = 100;
dc->caps.i2c_speed_in_khz_hdcp = 100;
dc->caps.max_cursor_size = 256;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c
index dc4649458567..a4e9fd5307c6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c
@@ -94,7 +94,7 @@ static const struct hw_sequencer_funcs dcn32_funcs = {
.get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
.calc_vupdate_position = dcn10_calc_vupdate_position,
.apply_idle_power_optimizations = dcn32_apply_idle_power_optimizations,
- .does_plane_fit_in_mall = dcn30_does_plane_fit_in_mall,
+ .does_plane_fit_in_mall = NULL,
.set_backlight_level = dcn21_set_backlight_level,
.set_abm_immediate_disable = dcn21_set_abm_immediate_disable,
.hardware_release = dcn30_hardware_release,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
index 950669f2c10d..cb7c0c878423 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
@@ -3183,7 +3183,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
} else {
v->MIN_DST_Y_NEXT_START[k] = v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k];
}
- v->MIN_DST_Y_NEXT_START[k] += dml_floor(4.0 * v->TSetup[k] / (double)v->HTotal[k] / v->PixelClock[k], 1.0) / 4.0;
+ v->MIN_DST_Y_NEXT_START[k] += dml_floor(4.0 * v->TSetup[k] / ((double)v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
<= (isInterlaceTiming ?
dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index 5af601cff1a0..b53feeaf5cf1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -6257,12 +6257,12 @@ bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurface
double SwathSizePerSurfaceC[DC__NUM_DPP__MAX];
bool NotEnoughDETSwathFillLatencyHiding = false;
- /* calculate sum of single swath size for all pipes in bytes*/
+ /* calculate sum of single swath size for all pipes in bytes */
for (k = 0; k < NumberOfActiveSurfaces; k++) {
- SwathSizePerSurfaceY[k] += SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k];
+ SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k];
if (SwathHeightC[k] != 0)
- SwathSizePerSurfaceC[k] += SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k];
+ SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k];
else
SwathSizePerSurfaceC[k] = 0;
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
index 4a122925c3ae..92c18bfb98b3 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
@@ -532,6 +532,9 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub,
if (dmub->hw_funcs.reset)
dmub->hw_funcs.reset(dmub);
+ /* reset the cache of the last wptr as well now that hw is reset */
+ dmub->inbox1_last_wptr = 0;
+
cw0.offset.quad_part = inst_fb->gpu_addr;
cw0.region.base = DMUB_CW0_BASE;
cw0.region.top = cw0.region.base + inst_fb->size - 1;
@@ -649,6 +652,15 @@ enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub)
if (dmub->hw_funcs.reset)
dmub->hw_funcs.reset(dmub);
+ /* mailboxes have been reset in hw, so reset the sw state as well */
+ dmub->inbox1_last_wptr = 0;
+ dmub->inbox1_rb.wrpt = 0;
+ dmub->inbox1_rb.rptr = 0;
+ dmub->outbox0_rb.wrpt = 0;
+ dmub->outbox0_rb.rptr = 0;
+ dmub->outbox1_rb.wrpt = 0;
+ dmub->outbox1_rb.rptr = 0;
+
dmub->hw_init = false;
return DMUB_STATUS_OK;
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 236657eece47..2f3e239e623d 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -1991,6 +1991,8 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
case IP_VERSION(9, 4, 2):
case IP_VERSION(10, 3, 0):
case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
*states = ATTR_STATE_SUPPORTED;
break;
default:
@@ -2007,14 +2009,16 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
gc_ver == IP_VERSION(10, 3, 0) ||
gc_ver == IP_VERSION(10, 1, 2) ||
gc_ver == IP_VERSION(11, 0, 0) ||
- gc_ver == IP_VERSION(11, 0, 2)))
+ gc_ver == IP_VERSION(11, 0, 2) ||
+ gc_ver == IP_VERSION(11, 0, 3)))
*states = ATTR_STATE_UNSUPPORTED;
} else if (DEVICE_ATTR_IS(pp_dpm_dclk)) {
if (!(gc_ver == IP_VERSION(10, 3, 1) ||
gc_ver == IP_VERSION(10, 3, 0) ||
gc_ver == IP_VERSION(10, 1, 2) ||
gc_ver == IP_VERSION(11, 0, 0) ||
- gc_ver == IP_VERSION(11, 0, 2)))
+ gc_ver == IP_VERSION(11, 0, 2) ||
+ gc_ver == IP_VERSION(11, 0, 3)))
*states = ATTR_STATE_UNSUPPORTED;
} else if (DEVICE_ATTR_IS(pp_power_profile_mode)) {
if (amdgpu_dpm_get_power_profile_mode(adev, NULL) == -EOPNOTSUPP)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index ca3beb5d8f27..6ab155023592 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1500,6 +1500,20 @@ static int smu_disable_dpms(struct smu_context *smu)
}
/*
+ * For SMU 13.0.4/11, PMFW will handle the features disablement properly
+ * for gpu reset case. Driver involvement is unnecessary.
+ */
+ if (amdgpu_in_reset(adev)) {
+ switch (adev->ip_versions[MP1_HWIP][0]) {
+ case IP_VERSION(13, 0, 4):
+ case IP_VERSION(13, 0, 11):
+ return 0;
+ default:
+ break;
+ }
+ }
+
+ /*
* For gpu reset, runpm and hibernation through BACO,
* BACO feature has to be kept enabled.
*/
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
index d6b964cf73bd..4bc7aee4d44f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
@@ -123,7 +123,8 @@
(1 << FEATURE_DS_FCLK_BIT) | \
(1 << FEATURE_DS_LCLK_BIT) | \
(1 << FEATURE_DS_DCFCLK_BIT) | \
- (1 << FEATURE_DS_UCLK_BIT))
+ (1 << FEATURE_DS_UCLK_BIT) | \
+ (1ULL << FEATURE_DS_VCN_BIT))
//For use with feature control messages
typedef enum {
@@ -522,9 +523,9 @@ typedef enum {
TEMP_HOTSPOT_M,
TEMP_MEM,
TEMP_VR_GFX,
- TEMP_VR_SOC,
TEMP_VR_MEM0,
TEMP_VR_MEM1,
+ TEMP_VR_SOC,
TEMP_VR_U,
TEMP_LIQUID0,
TEMP_LIQUID1,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
index d6b13933a98f..48a3a3952ceb 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
@@ -113,20 +113,21 @@
#define NUM_FEATURES 64
#define ALLOWED_FEATURE_CTRL_DEFAULT 0xFFFFFFFFFFFFFFFFULL
-#define ALLOWED_FEATURE_CTRL_SCPM (1 << FEATURE_DPM_GFXCLK_BIT) | \
- (1 << FEATURE_DPM_GFX_POWER_OPTIMIZER_BIT) | \
- (1 << FEATURE_DPM_UCLK_BIT) | \
- (1 << FEATURE_DPM_FCLK_BIT) | \
- (1 << FEATURE_DPM_SOCCLK_BIT) | \
- (1 << FEATURE_DPM_MP0CLK_BIT) | \
- (1 << FEATURE_DPM_LINK_BIT) | \
- (1 << FEATURE_DPM_DCN_BIT) | \
- (1 << FEATURE_DS_GFXCLK_BIT) | \
- (1 << FEATURE_DS_SOCCLK_BIT) | \
- (1 << FEATURE_DS_FCLK_BIT) | \
- (1 << FEATURE_DS_LCLK_BIT) | \
- (1 << FEATURE_DS_DCFCLK_BIT) | \
- (1 << FEATURE_DS_UCLK_BIT)
+#define ALLOWED_FEATURE_CTRL_SCPM ((1 << FEATURE_DPM_GFXCLK_BIT) | \
+ (1 << FEATURE_DPM_GFX_POWER_OPTIMIZER_BIT) | \
+ (1 << FEATURE_DPM_UCLK_BIT) | \
+ (1 << FEATURE_DPM_FCLK_BIT) | \
+ (1 << FEATURE_DPM_SOCCLK_BIT) | \
+ (1 << FEATURE_DPM_MP0CLK_BIT) | \
+ (1 << FEATURE_DPM_LINK_BIT) | \
+ (1 << FEATURE_DPM_DCN_BIT) | \
+ (1 << FEATURE_DS_GFXCLK_BIT) | \
+ (1 << FEATURE_DS_SOCCLK_BIT) | \
+ (1 << FEATURE_DS_FCLK_BIT) | \
+ (1 << FEATURE_DS_LCLK_BIT) | \
+ (1 << FEATURE_DS_DCFCLK_BIT) | \
+ (1 << FEATURE_DS_UCLK_BIT) | \
+ (1ULL << FEATURE_DS_VCN_BIT))
//For use with feature control messages
typedef enum {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index e8c6febb8b64..992163e66f7b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -28,11 +28,11 @@
#define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF
#define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04
#define SMU13_DRIVER_IF_VERSION_ALDE 0x08
-#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0 0x34
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0 0x37
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32
-#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x35
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x37
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_10 0x1D
#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
index 85e22210963f..5cdc07165480 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
@@ -1171,6 +1171,7 @@ static int renoir_get_smu_metrics_data(struct smu_context *smu,
int ret = 0;
uint32_t apu_percent = 0;
uint32_t dgpu_percent = 0;
+ struct amdgpu_device *adev = smu->adev;
ret = smu_cmn_get_metrics_table(smu,
@@ -1196,7 +1197,11 @@ static int renoir_get_smu_metrics_data(struct smu_context *smu,
*value = metrics->AverageUvdActivity / 100;
break;
case METRICS_AVERAGE_SOCKETPOWER:
- *value = (metrics->CurrentSocketPower << 8) / 1000;
+ if (((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 1)) && (adev->pm.fw_version >= 0x40000f)) ||
+ ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 0)) && (adev->pm.fw_version >= 0x373200)))
+ *value = metrics->CurrentSocketPower << 8;
+ else
+ *value = (metrics->CurrentSocketPower << 8) / 1000;
break;
case METRICS_TEMPERATURE_EDGE:
*value = (metrics->GfxTemperature / 100) *
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index e54b760b875b..b4373b6568ae 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -1261,7 +1261,8 @@ int smu_v13_0_set_fan_speed_rpm(struct smu_context *smu,
uint32_t speed)
{
struct amdgpu_device *adev = smu->adev;
- uint32_t tach_period, crystal_clock_freq;
+ uint32_t crystal_clock_freq = 2500;
+ uint32_t tach_period;
int ret;
if (!speed)
@@ -1271,7 +1272,6 @@ int smu_v13_0_set_fan_speed_rpm(struct smu_context *smu,
if (ret)
return ret;
- crystal_clock_freq = amdgpu_asic_get_xclk(adev);
tach_period = 60 * crystal_clock_freq * 10000 / (8 * speed);
WREG32_SOC15(THM, 0, regCG_TACH_CTRL,
REG_SET_FIELD(RREG32_SOC15(THM, 0, regCG_TACH_CTRL),
@@ -2298,6 +2298,10 @@ bool smu_v13_0_baco_is_support(struct smu_context *smu)
!smu_baco->platform_support)
return false;
+ /* return true if ASIC is in BACO state already */
+ if (smu_v13_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER)
+ return true;
+
if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) &&
!smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT))
return false;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 9643b21c636a..508e392547d7 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -145,6 +145,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(SetBadMemoryPagesRetiredFlagsPerChannel,
PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel, 0),
MSG_MAP(AllowGpo, PPSMC_MSG_SetGpoAllow, 0),
+ MSG_MAP(AllowIHHostInterrupt, PPSMC_MSG_AllowIHHostInterrupt, 0),
};
static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = {
@@ -213,6 +214,7 @@ static struct cmn2asic_mapping smu_v13_0_0_feature_mask_map[SMU_FEATURE_COUNT] =
FEA_MAP(SOC_PCC),
[SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
[SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
+ [SMU_FEATURE_PPT_BIT] = {1, FEATURE_THROTTLERS_BIT},
};
static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = {
@@ -405,6 +407,9 @@ static int smu_v13_0_0_setup_pptable(struct smu_context *smu)
struct amdgpu_device *adev = smu->adev;
int ret = 0;
+ if (amdgpu_sriov_vf(smu->adev))
+ return 0;
+
ret = smu_v13_0_0_get_pptable_from_pmfw(smu,
&smu_table->power_play_table,
&smu_table->power_play_table_size);
@@ -1255,6 +1260,9 @@ static int smu_v13_0_0_get_thermal_temperature_range(struct smu_context *smu,
table_context->power_play_table;
PPTable_t *pptable = smu->smu_table.driver_pptable;
+ if (amdgpu_sriov_vf(smu->adev))
+ return 0;
+
if (!range)
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 5c6c6ad011ca..9e1967d8049e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -124,6 +124,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0),
MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0),
MSG_MAP(AllowGpo, PPSMC_MSG_SetGpoAllow, 0),
+ MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 0),
};
static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = {
@@ -192,6 +193,7 @@ static struct cmn2asic_mapping smu_v13_0_7_feature_mask_map[SMU_FEATURE_COUNT] =
FEA_MAP(SOC_PCC),
[SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
[SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
+ [SMU_FEATURE_PPT_BIT] = {1, FEATURE_THROTTLERS_BIT},
};
static struct cmn2asic_mapping smu_v13_0_7_table_map[SMU_TABLE_COUNT] = {
diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index c7443317c747..66a4a41c3fe9 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -714,7 +714,7 @@ static int ast_primary_plane_init(struct ast_private *ast)
struct ast_plane *ast_primary_plane = &ast->primary_plane;
struct drm_plane *primary_plane = &ast_primary_plane->base;
void __iomem *vaddr = ast->vram;
- u64 offset = ast->vram_base;
+ u64 offset = 0; /* with shmem, the primary plane is always at offset 0 */
unsigned long cursor_size = roundup(AST_HWC_SIZE + AST_HWC_SIGNATURE_SIZE, PAGE_SIZE);
unsigned long size = ast->vram_fb_available - cursor_size;
int ret;
@@ -972,7 +972,7 @@ static int ast_cursor_plane_init(struct ast_private *ast)
return -ENOMEM;
vaddr = ast->vram + ast->vram_fb_available - size;
- offset = ast->vram_base + ast->vram_fb_available - size;
+ offset = ast->vram_fb_available - size;
ret = ast_plane_init(dev, ast_cursor_plane, vaddr, offset, size,
0x01, &ast_cursor_plane_funcs,
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c
index a2f0860b20bb..d751820c6da6 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c
@@ -193,6 +193,7 @@ static int snd_dw_hdmi_probe(struct platform_device *pdev)
struct hdmi_codec_pdata pdata;
struct platform_device *platform;
+ memset(&pdata, 0, sizeof(pdata));
pdata.ops = &dw_hdmi_i2s_ops;
pdata.i2s = 1;
pdata.max_i2s_channels = 8;
diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index 51a46689cda7..4ca37261584a 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -3372,6 +3372,9 @@ void drm_dp_remove_payload(struct drm_dp_mst_topology_mgr *mgr,
mgr->payload_count--;
mgr->next_start_slot -= payload->time_slots;
+
+ if (payload->delete)
+ drm_dp_mst_put_port_malloc(payload->port);
}
EXPORT_SYMBOL(drm_dp_remove_payload);
@@ -4327,7 +4330,6 @@ int drm_dp_atomic_release_time_slots(struct drm_atomic_state *state,
drm_dbg_atomic(mgr->dev, "[MST PORT:%p] TU %d -> 0\n", port, payload->time_slots);
if (!payload->delete) {
- drm_dp_mst_put_port_malloc(port);
payload->pbn = 0;
payload->delete = true;
topology_state->payload_mask &= ~BIT(payload->vcpi - 1);
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 11bb59399471..3d1f50f481cf 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -38,6 +38,25 @@ static void drm_block_free(struct drm_buddy *mm,
kmem_cache_free(slab_blocks, block);
}
+static void list_insert_sorted(struct drm_buddy *mm,
+ struct drm_buddy_block *block)
+{
+ struct drm_buddy_block *node;
+ struct list_head *head;
+
+ head = &mm->free_list[drm_buddy_block_order(block)];
+ if (list_empty(head)) {
+ list_add(&block->link, head);
+ return;
+ }
+
+ list_for_each_entry(node, head, link)
+ if (drm_buddy_block_offset(block) < drm_buddy_block_offset(node))
+ break;
+
+ __list_add(&block->link, node->link.prev, &node->link);
+}
+
static void mark_allocated(struct drm_buddy_block *block)
{
block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -52,8 +71,7 @@ static void mark_free(struct drm_buddy *mm,
block->header &= ~DRM_BUDDY_HEADER_STATE;
block->header |= DRM_BUDDY_FREE;
- list_add(&block->link,
- &mm->free_list[drm_buddy_block_order(block)]);
+ list_insert_sorted(mm, block);
}
static void mark_split(struct drm_buddy_block *block)
@@ -387,20 +405,26 @@ err_undo:
}
static struct drm_buddy_block *
-get_maxblock(struct list_head *head)
+get_maxblock(struct drm_buddy *mm, unsigned int order)
{
struct drm_buddy_block *max_block = NULL, *node;
+ unsigned int i;
- max_block = list_first_entry_or_null(head,
- struct drm_buddy_block,
- link);
- if (!max_block)
- return NULL;
+ for (i = order; i <= mm->max_order; ++i) {
+ if (!list_empty(&mm->free_list[i])) {
+ node = list_last_entry(&mm->free_list[i],
+ struct drm_buddy_block,
+ link);
+ if (!max_block) {
+ max_block = node;
+ continue;
+ }
- list_for_each_entry(node, head, link) {
- if (drm_buddy_block_offset(node) >
- drm_buddy_block_offset(max_block))
- max_block = node;
+ if (drm_buddy_block_offset(node) >
+ drm_buddy_block_offset(max_block)) {
+ max_block = node;
+ }
+ }
}
return max_block;
@@ -412,20 +436,23 @@ alloc_from_freelist(struct drm_buddy *mm,
unsigned long flags)
{
struct drm_buddy_block *block = NULL;
- unsigned int i;
+ unsigned int tmp;
int err;
- for (i = order; i <= mm->max_order; ++i) {
- if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
- block = get_maxblock(&mm->free_list[i]);
- if (block)
- break;
- } else {
- block = list_first_entry_or_null(&mm->free_list[i],
- struct drm_buddy_block,
- link);
- if (block)
- break;
+ if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
+ block = get_maxblock(mm, order);
+ if (block)
+ /* Store the obtained block order */
+ tmp = drm_buddy_block_order(block);
+ } else {
+ for (tmp = order; tmp <= mm->max_order; ++tmp) {
+ if (!list_empty(&mm->free_list[tmp])) {
+ block = list_last_entry(&mm->free_list[tmp],
+ struct drm_buddy_block,
+ link);
+ if (block)
+ break;
+ }
}
}
@@ -434,18 +461,18 @@ alloc_from_freelist(struct drm_buddy *mm,
BUG_ON(!drm_buddy_block_is_free(block));
- while (i != order) {
+ while (tmp != order) {
err = split_block(mm, block);
if (unlikely(err))
goto err_undo;
block = block->right;
- i--;
+ tmp--;
}
return block;
err_undo:
- if (i != order)
+ if (tmp != order)
__drm_buddy_free(mm, block);
return ERR_PTR(err);
}
diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c
index fd67efe37c63..056ab9d5f313 100644
--- a/drivers/gpu/drm/drm_client.c
+++ b/drivers/gpu/drm/drm_client.c
@@ -233,21 +233,17 @@ void drm_client_dev_restore(struct drm_device *dev)
static void drm_client_buffer_delete(struct drm_client_buffer *buffer)
{
- struct drm_device *dev = buffer->client->dev;
-
if (buffer->gem) {
drm_gem_vunmap_unlocked(buffer->gem, &buffer->map);
drm_gem_object_put(buffer->gem);
}
- if (buffer->handle)
- drm_mode_destroy_dumb(dev, buffer->handle, buffer->client->file);
-
kfree(buffer);
}
static struct drm_client_buffer *
-drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 height, u32 format)
+drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 height,
+ u32 format, u32 *handle)
{
const struct drm_format_info *info = drm_format_info(format);
struct drm_mode_create_dumb dumb_args = { };
@@ -269,16 +265,15 @@ drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 height, u
if (ret)
goto err_delete;
- buffer->handle = dumb_args.handle;
- buffer->pitch = dumb_args.pitch;
-
obj = drm_gem_object_lookup(client->file, dumb_args.handle);
if (!obj) {
ret = -ENOENT;
goto err_delete;
}
+ buffer->pitch = dumb_args.pitch;
buffer->gem = obj;
+ *handle = dumb_args.handle;
return buffer;
@@ -365,7 +360,8 @@ static void drm_client_buffer_rmfb(struct drm_client_buffer *buffer)
}
static int drm_client_buffer_addfb(struct drm_client_buffer *buffer,
- u32 width, u32 height, u32 format)
+ u32 width, u32 height, u32 format,
+ u32 handle)
{
struct drm_client_dev *client = buffer->client;
struct drm_mode_fb_cmd fb_req = { };
@@ -377,7 +373,7 @@ static int drm_client_buffer_addfb(struct drm_client_buffer *buffer,
fb_req.depth = info->depth;
fb_req.width = width;
fb_req.height = height;
- fb_req.handle = buffer->handle;
+ fb_req.handle = handle;
fb_req.pitch = buffer->pitch;
ret = drm_mode_addfb(client->dev, &fb_req, client->file);
@@ -414,13 +410,24 @@ struct drm_client_buffer *
drm_client_framebuffer_create(struct drm_client_dev *client, u32 width, u32 height, u32 format)
{
struct drm_client_buffer *buffer;
+ u32 handle;
int ret;
- buffer = drm_client_buffer_create(client, width, height, format);
+ buffer = drm_client_buffer_create(client, width, height, format,
+ &handle);
if (IS_ERR(buffer))
return buffer;
- ret = drm_client_buffer_addfb(buffer, width, height, format);
+ ret = drm_client_buffer_addfb(buffer, width, height, format, handle);
+
+ /*
+ * The handle is only needed for creating the framebuffer, destroy it
+ * again to solve a circular dependency should anybody export the GEM
+ * object as DMA-buf. The framebuffer and our buffer structure are still
+ * holding references to the GEM object to prevent its destruction.
+ */
+ drm_mode_destroy_dumb(client->dev, handle, client->file);
+
if (ret) {
drm_client_buffer_delete(buffer);
return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index b3a731b9170a..0d0c26ebab90 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -30,7 +30,9 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/console.h>
+#include <linux/pci.h>
#include <linux/sysrq.h>
+#include <linux/vga_switcheroo.h>
#include <drm/drm_atomic.h>
#include <drm/drm_drv.h>
@@ -1909,6 +1911,11 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
return ret;
strcpy(fb_helper->fb->comm, "[fbcon]");
+
+ /* Set the fb info for vgaswitcheroo clients. Does nothing otherwise. */
+ if (dev_is_pci(dev->dev))
+ vga_switcheroo_client_fb_set(to_pci_dev(dev->dev), fb_helper->info);
+
return 0;
}
diff --git a/drivers/gpu/drm/drm_fbdev_generic.c b/drivers/gpu/drm/drm_fbdev_generic.c
index ab8695669279..593aa3283792 100644
--- a/drivers/gpu/drm/drm_fbdev_generic.c
+++ b/drivers/gpu/drm/drm_fbdev_generic.c
@@ -171,11 +171,6 @@ static const struct fb_ops drm_fbdev_fb_ops = {
.fb_imageblit = drm_fbdev_fb_imageblit,
};
-static struct fb_deferred_io drm_fbdev_defio = {
- .delay = HZ / 20,
- .deferred_io = drm_fb_helper_deferred_io,
-};
-
/*
* This function uses the client API to create a framebuffer backed by a dumb buffer.
*/
@@ -222,8 +217,14 @@ static int drm_fbdev_fb_probe(struct drm_fb_helper *fb_helper,
return -ENOMEM;
fbi->flags |= FBINFO_VIRTFB | FBINFO_READS_FAST;
- fbi->fbdefio = &drm_fbdev_defio;
- fb_deferred_io_init(fbi);
+ /* Set a default deferred I/O handler */
+ fb_helper->fbdefio.delay = HZ / 20;
+ fb_helper->fbdefio.deferred_io = drm_fb_helper_deferred_io;
+
+ fbi->fbdefio = &fb_helper->fbdefio;
+ ret = fb_deferred_io_init(fbi);
+ if (ret)
+ return ret;
} else {
/* buffer is mapped for HW framebuffer */
ret = drm_client_buffer_vmap(fb_helper->buffer, &map);
diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c
index 52d8800a8ab8..3659f0465a72 100644
--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c
+++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c
@@ -304,6 +304,12 @@ static const struct dmi_system_id orientation_data[] = {
DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad D330-10IGM"),
},
.driver_data = (void *)&lcd1200x1920_rightside_up,
+ }, { /* Lenovo Ideapad D330-10IGL (HD) */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad D330-10IGL"),
+ },
+ .driver_data = (void *)&lcd800x1280_rightside_up,
}, { /* Lenovo Yoga Book X90F / X91F / X91L */
.matches = {
/* Non exact match to match all versions */
diff --git a/drivers/gpu/drm/drm_vma_manager.c b/drivers/gpu/drm/drm_vma_manager.c
index 7de37f8c68fd..83229a031af0 100644
--- a/drivers/gpu/drm/drm_vma_manager.c
+++ b/drivers/gpu/drm/drm_vma_manager.c
@@ -240,27 +240,8 @@ void drm_vma_offset_remove(struct drm_vma_offset_manager *mgr,
}
EXPORT_SYMBOL(drm_vma_offset_remove);
-/**
- * drm_vma_node_allow - Add open-file to list of allowed users
- * @node: Node to modify
- * @tag: Tag of file to remove
- *
- * Add @tag to the list of allowed open-files for this node. If @tag is
- * already on this list, the ref-count is incremented.
- *
- * The list of allowed-users is preserved across drm_vma_offset_add() and
- * drm_vma_offset_remove() calls. You may even call it if the node is currently
- * not added to any offset-manager.
- *
- * You must remove all open-files the same number of times as you added them
- * before destroying the node. Otherwise, you will leak memory.
- *
- * This is locked against concurrent access internally.
- *
- * RETURNS:
- * 0 on success, negative error code on internal failure (out-of-mem)
- */
-int drm_vma_node_allow(struct drm_vma_offset_node *node, struct drm_file *tag)
+static int vma_node_allow(struct drm_vma_offset_node *node,
+ struct drm_file *tag, bool ref_counted)
{
struct rb_node **iter;
struct rb_node *parent = NULL;
@@ -282,7 +263,8 @@ int drm_vma_node_allow(struct drm_vma_offset_node *node, struct drm_file *tag)
entry = rb_entry(*iter, struct drm_vma_offset_file, vm_rb);
if (tag == entry->vm_tag) {
- entry->vm_count++;
+ if (ref_counted)
+ entry->vm_count++;
goto unlock;
} else if (tag > entry->vm_tag) {
iter = &(*iter)->rb_right;
@@ -307,9 +289,59 @@ unlock:
kfree(new);
return ret;
}
+
+/**
+ * drm_vma_node_allow - Add open-file to list of allowed users
+ * @node: Node to modify
+ * @tag: Tag of file to remove
+ *
+ * Add @tag to the list of allowed open-files for this node. If @tag is
+ * already on this list, the ref-count is incremented.
+ *
+ * The list of allowed-users is preserved across drm_vma_offset_add() and
+ * drm_vma_offset_remove() calls. You may even call it if the node is currently
+ * not added to any offset-manager.
+ *
+ * You must remove all open-files the same number of times as you added them
+ * before destroying the node. Otherwise, you will leak memory.
+ *
+ * This is locked against concurrent access internally.
+ *
+ * RETURNS:
+ * 0 on success, negative error code on internal failure (out-of-mem)
+ */
+int drm_vma_node_allow(struct drm_vma_offset_node *node, struct drm_file *tag)
+{
+ return vma_node_allow(node, tag, true);
+}
EXPORT_SYMBOL(drm_vma_node_allow);
/**
+ * drm_vma_node_allow_once - Add open-file to list of allowed users
+ * @node: Node to modify
+ * @tag: Tag of file to remove
+ *
+ * Add @tag to the list of allowed open-files for this node.
+ *
+ * The list of allowed-users is preserved across drm_vma_offset_add() and
+ * drm_vma_offset_remove() calls. You may even call it if the node is currently
+ * not added to any offset-manager.
+ *
+ * This is not ref-counted unlike drm_vma_node_allow() hence drm_vma_node_revoke()
+ * should only be called once after this.
+ *
+ * This is locked against concurrent access internally.
+ *
+ * RETURNS:
+ * 0 on success, negative error code on internal failure (out-of-mem)
+ */
+int drm_vma_node_allow_once(struct drm_vma_offset_node *node, struct drm_file *tag)
+{
+ return vma_node_allow(node, tag, false);
+}
+EXPORT_SYMBOL(drm_vma_node_allow_once);
+
+/**
* drm_vma_node_revoke - Remove open-file from list of allowed users
* @node: Node to modify
* @tag: Tag of file to remove
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index 572a4e3769f3..a491e6c38875 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -2466,6 +2466,22 @@ static enum port dvo_port_to_port(struct drm_i915_private *i915,
dvo_port);
}
+static enum port
+dsi_dvo_port_to_port(struct drm_i915_private *i915, u8 dvo_port)
+{
+ switch (dvo_port) {
+ case DVO_PORT_MIPIA:
+ return PORT_A;
+ case DVO_PORT_MIPIC:
+ if (DISPLAY_VER(i915) >= 11)
+ return PORT_B;
+ else
+ return PORT_C;
+ default:
+ return PORT_NONE;
+ }
+}
+
static int parse_bdb_230_dp_max_link_rate(const int vbt_max_link_rate)
{
switch (vbt_max_link_rate) {
@@ -3414,19 +3430,16 @@ bool intel_bios_is_dsi_present(struct drm_i915_private *i915,
dvo_port = child->dvo_port;
- if (dvo_port == DVO_PORT_MIPIA ||
- (dvo_port == DVO_PORT_MIPIB && DISPLAY_VER(i915) >= 11) ||
- (dvo_port == DVO_PORT_MIPIC && DISPLAY_VER(i915) < 11)) {
- if (port)
- *port = dvo_port - DVO_PORT_MIPIA;
- return true;
- } else if (dvo_port == DVO_PORT_MIPIB ||
- dvo_port == DVO_PORT_MIPIC ||
- dvo_port == DVO_PORT_MIPID) {
+ if (dsi_dvo_port_to_port(i915, dvo_port) == PORT_NONE) {
drm_dbg_kms(&i915->drm,
"VBT has unsupported DSI port %c\n",
port_name(dvo_port - DVO_PORT_MIPIA));
+ continue;
}
+
+ if (port)
+ *port = dsi_dvo_port_to_port(i915, dvo_port);
+ return true;
}
return false;
@@ -3511,7 +3524,7 @@ bool intel_bios_get_dsc_params(struct intel_encoder *encoder,
if (!(child->device_type & DEVICE_TYPE_MIPI_OUTPUT))
continue;
- if (child->dvo_port - DVO_PORT_MIPIA == encoder->port) {
+ if (dsi_dvo_port_to_port(i915, child->dvo_port) == encoder->port) {
if (!devdata->dsc)
return false;
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index b74e36d76013..407a477939e5 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -1319,7 +1319,7 @@ static const struct intel_cdclk_vals adlp_cdclk_table[] = {
{ .refclk = 24000, .cdclk = 192000, .divider = 2, .ratio = 16 },
{ .refclk = 24000, .cdclk = 312000, .divider = 2, .ratio = 26 },
{ .refclk = 24000, .cdclk = 552000, .divider = 2, .ratio = 46 },
- { .refclk = 24400, .cdclk = 648000, .divider = 2, .ratio = 54 },
+ { .refclk = 24000, .cdclk = 648000, .divider = 2, .ratio = 54 },
{ .refclk = 38400, .cdclk = 179200, .divider = 3, .ratio = 14 },
{ .refclk = 38400, .cdclk = 192000, .divider = 2, .ratio = 10 },
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index 5575d7abdc09..f76c06b7f1d4 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -328,8 +328,20 @@ out_unlock:
return ret;
}
+static int intelfb_dirty(struct drm_fb_helper *helper, struct drm_clip_rect *clip)
+{
+ if (!(clip->x1 < clip->x2 && clip->y1 < clip->y2))
+ return 0;
+
+ if (helper->fb->funcs->dirty)
+ return helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, clip, 1);
+
+ return 0;
+}
+
static const struct drm_fb_helper_funcs intel_fb_helper_funcs = {
.fb_probe = intelfb_create,
+ .fb_dirty = intelfb_dirty,
};
static void intel_fbdev_destroy(struct intel_fbdev *ifbdev)
diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
index 76490cc59d8f..7d07fa3123ec 100644
--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
@@ -1627,7 +1627,7 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state)
u32 offset;
int ret;
- if (w > max_width || w < min_width || h > max_height) {
+ if (w > max_width || w < min_width || h > max_height || h < 1) {
drm_dbg_kms(&dev_priv->drm,
"requested Y/RGB source size %dx%d outside limits (min: %dx1 max: %dx%d)\n",
w, h, min_width, max_width, max_height);
diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c
index e0766d1be966..11554645e6ee 100644
--- a/drivers/gpu/drm/i915/display/skl_watermark.c
+++ b/drivers/gpu/drm/i915/display/skl_watermark.c
@@ -1587,7 +1587,8 @@ skl_crtc_allocate_plane_ddb(struct intel_atomic_state *state,
skl_check_wm_level(&wm->wm[level], ddb);
if (icl_need_wm1_wa(i915, plane_id) &&
- level == 1 && wm->wm[0].enable) {
+ level == 1 && !wm->wm[level].enable &&
+ wm->wm[0].enable) {
wm->wm[level].blocks = wm->wm[0].blocks;
wm->wm[level].lines = wm->wm[0].lines;
wm->wm[level].ignore_lines = wm->wm[0].ignore_lines;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 7f2831efc798..e4b78ab4773b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1688,6 +1688,10 @@ void i915_gem_init__contexts(struct drm_i915_private *i915)
init_contexts(&i915->gem.contexts);
}
+/*
+ * Note that this implicitly consumes the ctx reference, by placing
+ * the ctx in the context_xa.
+ */
static void gem_context_register(struct i915_gem_context *ctx,
struct drm_i915_file_private *fpriv,
u32 id)
@@ -1703,10 +1707,6 @@ static void gem_context_register(struct i915_gem_context *ctx,
snprintf(ctx->name, sizeof(ctx->name), "%s[%d]",
current->comm, pid_nr(ctx->pid));
- /* And finally expose ourselves to userspace via the idr */
- old = xa_store(&fpriv->context_xa, id, ctx, GFP_KERNEL);
- WARN_ON(old);
-
spin_lock(&ctx->client->ctx_lock);
list_add_tail_rcu(&ctx->client_link, &ctx->client->ctx_list);
spin_unlock(&ctx->client->ctx_lock);
@@ -1714,6 +1714,10 @@ static void gem_context_register(struct i915_gem_context *ctx,
spin_lock(&i915->gem.contexts.lock);
list_add_tail(&ctx->link, &i915->gem.contexts.list);
spin_unlock(&i915->gem.contexts.lock);
+
+ /* And finally expose ourselves to userspace via the idr */
+ old = xa_store(&fpriv->context_xa, id, ctx, GFP_KERNEL);
+ WARN_ON(old);
}
int i915_gem_context_open(struct drm_i915_private *i915,
@@ -1857,11 +1861,19 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv,
vm = ctx->vm;
GEM_BUG_ON(!vm);
+ /*
+ * Get a reference for the allocated handle. Once the handle is
+ * visible in the vm_xa table, userspace could try to close it
+ * from under our feet, so we need to hold the extra reference
+ * first.
+ */
+ i915_vm_get(vm);
+
err = xa_alloc(&file_priv->vm_xa, &id, vm, xa_limit_32b, GFP_KERNEL);
- if (err)
+ if (err) {
+ i915_vm_put(vm);
return err;
-
- i915_vm_get(vm);
+ }
GEM_BUG_ON(id == 0); /* reserved for invalid/unassigned ppgtt */
args->value = id;
@@ -2199,14 +2211,22 @@ finalize_create_context_locked(struct drm_i915_file_private *file_priv,
if (IS_ERR(ctx))
return ctx;
+ /*
+ * One for the xarray and one for the caller. We need to grab
+ * the reference *prior* to making the ctx visble to userspace
+ * in gem_context_register(), as at any point after that
+ * userspace can try to race us with another thread destroying
+ * the context under our feet.
+ */
+ i915_gem_context_get(ctx);
+
gem_context_register(ctx, file_priv, id);
old = xa_erase(&file_priv->proto_context_xa, id);
GEM_BUG_ON(old != pc);
proto_context_close(file_priv->dev_priv, pc);
- /* One for the xarray and one for the caller */
- return i915_gem_context_get(ctx);
+ return ctx;
}
struct i915_gem_context *
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index f266b68cf012..0f2e056c02dd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -3483,6 +3483,13 @@ err_request:
eb.composite_fence :
&eb.requests[0]->fence);
+ if (unlikely(eb.gem_context->syncobj)) {
+ drm_syncobj_replace_fence(eb.gem_context->syncobj,
+ eb.composite_fence ?
+ eb.composite_fence :
+ &eb.requests[0]->fence);
+ }
+
if (out_fence) {
if (err == 0) {
fd_install(out_fence_fd, out_fence->file);
@@ -3494,13 +3501,6 @@ err_request:
}
}
- if (unlikely(eb.gem_context->syncobj)) {
- drm_syncobj_replace_fence(eb.gem_context->syncobj,
- eb.composite_fence ?
- eb.composite_fence :
- &eb.requests[0]->fence);
- }
-
if (!out_fence && eb.composite_fence)
dma_fence_put(eb.composite_fence);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 0ad44f3868de..c7c252d4d366 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -697,7 +697,7 @@ insert:
GEM_BUG_ON(lookup_mmo(obj, mmap_type) != mmo);
out:
if (file)
- drm_vma_node_allow(&mmo->vma_node, file);
+ drm_vma_node_allow_once(&mmo->vma_node, file);
return mmo;
err:
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 9c759df700ca..937728840428 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -579,7 +579,7 @@ static int shmem_object_init(struct intel_memory_region *mem,
mapping_set_gfp_mask(mapping, mask);
GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
- i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, 0);
+ i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, flags);
obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
index fd42b89b7162..bc21b1c2350a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -305,10 +305,6 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
spin_unlock(&obj->vma.lock);
obj->tiling_and_stride = tiling | stride;
- i915_gem_object_unlock(obj);
-
- /* Force the fence to be reacquired for GTT access */
- i915_gem_object_release_mmap_gtt(obj);
/* Try to preallocate memory required to save swizzling on put-pages */
if (i915_gem_object_needs_bit17_swizzle(obj)) {
@@ -321,6 +317,11 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
obj->bit_17 = NULL;
}
+ i915_gem_object_unlock(obj);
+
+ /* Force the fence to be reacquired for GTT access */
+ i915_gem_object_release_mmap_gtt(obj);
+
return 0;
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index beaf27e09e8a..977dead10ab5 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -1847,7 +1847,7 @@ static int igt_shrink_thp(void *arg)
I915_SHRINK_ACTIVE);
i915_vma_unpin(vma);
if (err)
- goto out_put;
+ goto out_wf;
/*
* Now that the pages are *unpinned* shrinking should invoke
@@ -1863,19 +1863,19 @@ static int igt_shrink_thp(void *arg)
pr_err("unexpected pages mismatch, should_swap=%s\n",
str_yes_no(should_swap));
err = -EINVAL;
- goto out_put;
+ goto out_wf;
}
if (should_swap == (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys)) {
pr_err("unexpected residual page-size bits, should_swap=%s\n",
str_yes_no(should_swap));
err = -EINVAL;
- goto out_put;
+ goto out_wf;
}
err = i915_vma_pin(vma, 0, 0, flags);
if (err)
- goto out_put;
+ goto out_wf;
while (n--) {
err = cpu_check(obj, n, 0xdeadbeaf);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index e94365b08f1e..2aa63ec521b8 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -528,7 +528,7 @@ retry:
return rq;
}
-struct i915_request *intel_context_find_active_request(struct intel_context *ce)
+struct i915_request *intel_context_get_active_request(struct intel_context *ce)
{
struct intel_context *parent = intel_context_to_parent(ce);
struct i915_request *rq, *active = NULL;
@@ -552,6 +552,8 @@ struct i915_request *intel_context_find_active_request(struct intel_context *ce)
active = rq;
}
+ if (active)
+ active = i915_request_get_rcu(active);
spin_unlock_irqrestore(&parent->guc_state.lock, flags);
return active;
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index fb62b7b8cbcd..0a8d553da3f4 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -268,8 +268,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
struct i915_request *intel_context_create_request(struct intel_context *ce);
-struct i915_request *
-intel_context_find_active_request(struct intel_context *ce);
+struct i915_request *intel_context_get_active_request(struct intel_context *ce);
static inline bool intel_context_is_barrier(const struct intel_context *ce)
{
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index cbc8b857d5f7..7a4504ea35c3 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -248,8 +248,8 @@ void intel_engine_dump_active_requests(struct list_head *requests,
ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine,
ktime_t *now);
-struct i915_request *
-intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine);
+void intel_engine_get_hung_entity(struct intel_engine_cs *engine,
+ struct intel_context **ce, struct i915_request **rq);
u32 intel_engine_context_size(struct intel_gt *gt, u8 class);
struct intel_context *
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index c33e0d72d670..d37931e16fd9 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -2094,17 +2094,6 @@ static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
}
}
-static unsigned long list_count(struct list_head *list)
-{
- struct list_head *pos;
- unsigned long count = 0;
-
- list_for_each(pos, list)
- count++;
-
- return count;
-}
-
static unsigned long read_ul(void *p, size_t x)
{
return *(unsigned long *)(p + x);
@@ -2196,11 +2185,11 @@ void intel_engine_dump_active_requests(struct list_head *requests,
}
}
-static void engine_dump_active_requests(struct intel_engine_cs *engine, struct drm_printer *m)
+static void engine_dump_active_requests(struct intel_engine_cs *engine,
+ struct drm_printer *m)
{
+ struct intel_context *hung_ce = NULL;
struct i915_request *hung_rq = NULL;
- struct intel_context *ce;
- bool guc;
/*
* No need for an engine->irq_seqno_barrier() before the seqno reads.
@@ -2209,27 +2198,22 @@ static void engine_dump_active_requests(struct intel_engine_cs *engine, struct d
* But the intention here is just to report an instantaneous snapshot
* so that's fine.
*/
- lockdep_assert_held(&engine->sched_engine->lock);
+ intel_engine_get_hung_entity(engine, &hung_ce, &hung_rq);
drm_printf(m, "\tRequests:\n");
- guc = intel_uc_uses_guc_submission(&engine->gt->uc);
- if (guc) {
- ce = intel_engine_get_hung_context(engine);
- if (ce)
- hung_rq = intel_context_find_active_request(ce);
- } else {
- hung_rq = intel_engine_execlist_find_hung_request(engine);
- }
-
if (hung_rq)
engine_dump_request(hung_rq, m, "\t\thung");
+ else if (hung_ce)
+ drm_printf(m, "\t\tGot hung ce but no hung rq!\n");
- if (guc)
+ if (intel_uc_uses_guc_submission(&engine->gt->uc))
intel_guc_dump_active_requests(engine, hung_rq, m);
else
- intel_engine_dump_active_requests(&engine->sched_engine->requests,
- hung_rq, m);
+ intel_execlists_dump_active_requests(engine, hung_rq, m);
+
+ if (hung_rq)
+ i915_request_put(hung_rq);
}
void intel_engine_dump(struct intel_engine_cs *engine,
@@ -2239,7 +2223,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
struct i915_gpu_error * const error = &engine->i915->gpu_error;
struct i915_request *rq;
intel_wakeref_t wakeref;
- unsigned long flags;
ktime_t dummy;
if (header) {
@@ -2276,13 +2259,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
i915_reset_count(error));
print_properties(engine, m);
- spin_lock_irqsave(&engine->sched_engine->lock, flags);
engine_dump_active_requests(engine, m);
- drm_printf(m, "\tOn hold?: %lu\n",
- list_count(&engine->sched_engine->hold));
- spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
-
drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base);
wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
if (wakeref) {
@@ -2328,8 +2306,7 @@ intel_engine_create_virtual(struct intel_engine_cs **siblings,
return siblings[0]->cops->create_virtual(siblings, count, flags);
}
-struct i915_request *
-intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine)
+static struct i915_request *engine_execlist_find_hung_request(struct intel_engine_cs *engine)
{
struct i915_request *request, *active = NULL;
@@ -2381,6 +2358,33 @@ intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine)
return active;
}
+void intel_engine_get_hung_entity(struct intel_engine_cs *engine,
+ struct intel_context **ce, struct i915_request **rq)
+{
+ unsigned long flags;
+
+ *ce = intel_engine_get_hung_context(engine);
+ if (*ce) {
+ intel_engine_clear_hung_context(engine);
+
+ *rq = intel_context_get_active_request(*ce);
+ return;
+ }
+
+ /*
+ * Getting here with GuC enabled means it is a forced error capture
+ * with no actual hang. So, no need to attempt the execlist search.
+ */
+ if (intel_uc_uses_guc_submission(&engine->gt->uc))
+ return;
+
+ spin_lock_irqsave(&engine->sched_engine->lock, flags);
+ *rq = engine_execlist_find_hung_request(engine);
+ if (*rq)
+ *rq = i915_request_get_rcu(*rq);
+ spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
+}
+
void xehp_enable_ccs_engines(struct intel_engine_cs *engine)
{
/*
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 2daffa7c7dfd..21cb5b69d82e 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -4148,6 +4148,33 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
spin_unlock_irqrestore(&sched_engine->lock, flags);
}
+static unsigned long list_count(struct list_head *list)
+{
+ struct list_head *pos;
+ unsigned long count = 0;
+
+ list_for_each(pos, list)
+ count++;
+
+ return count;
+}
+
+void intel_execlists_dump_active_requests(struct intel_engine_cs *engine,
+ struct i915_request *hung_rq,
+ struct drm_printer *m)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&engine->sched_engine->lock, flags);
+
+ intel_engine_dump_active_requests(&engine->sched_engine->requests, hung_rq, m);
+
+ drm_printf(m, "\tOn hold?: %lu\n",
+ list_count(&engine->sched_engine->hold));
+
+ spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_execlists.c"
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h
index a1aa92c983a5..d2c7d45ea062 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h
@@ -32,6 +32,10 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
int indent),
unsigned int max);
+void intel_execlists_dump_active_requests(struct intel_engine_cs *engine,
+ struct i915_request *hung_rq,
+ struct drm_printer *m);
+
bool
intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index c3cd92691795..a5454af2a9cf 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -406,10 +406,10 @@
#define GEN9_WM_CHICKEN3 _MMIO(0x5588)
#define GEN9_FACTOR_IN_CLR_VAL_HIZ (1 << 9)
-#define CHICKEN_RASTER_1 _MMIO(0x6204)
+#define CHICKEN_RASTER_1 MCR_REG(0x6204)
#define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8)
-#define CHICKEN_RASTER_2 _MMIO(0x6208)
+#define CHICKEN_RASTER_2 MCR_REG(0x6208)
#define TBIMR_FAST_CLIP REG_BIT(5)
#define VFLSKPD MCR_REG(0x62a8)
@@ -429,9 +429,10 @@
#define RC_OP_FLUSH_ENABLE (1 << 0)
#define HIZ_RAW_STALL_OPT_DISABLE (1 << 2)
#define CACHE_MODE_1 _MMIO(0x7004) /* IVB+ */
-#define PIXEL_SUBSPAN_COLLECT_OPT_DISABLE (1 << 6)
-#define GEN8_4x4_STC_OPTIMIZATION_DISABLE (1 << 6)
-#define GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE (1 << 1)
+#define MSAA_OPTIMIZATION_REDUC_DISABLE REG_BIT(11)
+#define PIXEL_SUBSPAN_COLLECT_OPT_DISABLE REG_BIT(6)
+#define GEN8_4x4_STC_OPTIMIZATION_DISABLE REG_BIT(6)
+#define GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE REG_BIT(1)
#define GEN7_GT_MODE _MMIO(0x7008)
#define GEN9_IZ_HASHING_MASK(slice) (0x3 << ((slice) * 2))
@@ -457,6 +458,9 @@
#define GEN8_L3CNTLREG _MMIO(0x7034)
#define GEN8_ERRDETBCTRL (1 << 9)
+#define PSS_MODE2 _MMIO(0x703c)
+#define SCOREBOARD_STALL_FLUSH_CONTROL REG_BIT(5)
+
#define GEN7_SC_INSTDONE _MMIO(0x7100)
#define GEN12_SC_INSTDONE_EXTRA _MMIO(0x7104)
#define GEN12_SC_INSTDONE_EXTRA2 _MMIO(0x7108)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 7771a19008c6..bbeeb6dde7ae 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -288,39 +288,6 @@ static const u8 dg2_xcs_offsets[] = {
END
};
-static const u8 mtl_xcs_offsets[] = {
- NOP(1),
- LRI(13, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
- NOP(4),
-
- NOP(1),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- END
-};
-
static const u8 gen8_rcs_offsets[] = {
NOP(1),
LRI(14, POSTED),
@@ -739,9 +706,7 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
else
return gen8_rcs_offsets;
} else {
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70))
- return mtl_xcs_offsets;
- else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
return dg2_xcs_offsets;
else if (GRAPHICS_VER(engine->i915) >= 12)
return gen12_xcs_offsets;
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 24736ebee17c..78dc5e493c62 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -278,6 +278,7 @@ out:
static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
{
struct intel_uncore *uncore = gt->uncore;
+ int loops = 2;
int err;
/*
@@ -285,18 +286,39 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
* for fifo space for the write or forcewake the chip for
* the read
*/
- intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
+ do {
+ intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
- /* Wait for the device to ack the reset requests */
- err = __intel_wait_for_register_fw(uncore,
- GEN6_GDRST, hw_domain_mask, 0,
- 500, 0,
- NULL);
+ /*
+ * Wait for the device to ack the reset requests.
+ *
+ * On some platforms, e.g. Jasperlake, we see that the
+ * engine register state is not cleared until shortly after
+ * GDRST reports completion, causing a failure as we try
+ * to immediately resume while the internal state is still
+ * in flux. If we immediately repeat the reset, the second
+ * reset appears to serialise with the first, and since
+ * it is a no-op, the registers should retain their reset
+ * value. However, there is still a concern that upon
+ * leaving the second reset, the internal engine state
+ * is still in flux and not ready for resuming.
+ */
+ err = __intel_wait_for_register_fw(uncore, GEN6_GDRST,
+ hw_domain_mask, 0,
+ 2000, 0,
+ NULL);
+ } while (err == 0 && --loops);
if (err)
GT_TRACE(gt,
"Wait for 0x%08x engines reset failed\n",
hw_domain_mask);
+ /*
+ * As we have observed that the engine state is still volatile
+ * after GDRST is acked, impose a small delay to let everything settle.
+ */
+ udelay(50);
+
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 2afb4f80a954..a0740308555d 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -645,7 +645,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
- wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
+ wa_mcr_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
wa_mcr_add(wal,
@@ -771,11 +771,19 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
/* Wa_14014947963:dg2 */
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
+ IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
+ /* Wa_18018764978:dg2 */
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_C0, STEP_FOREVER) ||
+ IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
+ wa_masked_en(wal, PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
+
/* Wa_15010599737:dg2 */
- wa_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN);
+ wa_mcr_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN);
+
+ /* Wa_18019271663:dg2 */
+ wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
}
static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine,
@@ -1347,6 +1355,13 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
GAMT_CHKN_BIT_REG,
GAMT_CHKN_DISABLE_L3_COH_PIPE);
+ /*
+ * Wa_1408615072:icl,ehl (vsunit)
+ * Wa_1407596294:icl,ehl (hsunit)
+ */
+ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
+ VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
+
/* Wa_1407352427:icl,ehl */
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
PSDUNIT_CLKGATE_DIS);
@@ -2532,13 +2547,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN11_ENABLE_32_PLANE_MODE);
/*
- * Wa_1408615072:icl,ehl (vsunit)
- * Wa_1407596294:icl,ehl (hsunit)
- */
- wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
- VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
-
- /*
* Wa_1408767742:icl[a2..forever],ehl[all]
* Wa_1605460711:icl[a0..c0]
*/
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 0a42f1807f52..c10977cb06b9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1702,7 +1702,7 @@ static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t st
goto next_context;
guilty = false;
- rq = intel_context_find_active_request(ce);
+ rq = intel_context_get_active_request(ce);
if (!rq) {
head = ce->ring->tail;
goto out_replay;
@@ -1715,6 +1715,7 @@ static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t st
head = intel_ring_wrap(ce->ring, rq->head);
__i915_request_reset(rq, guilty);
+ i915_request_put(rq);
out_replay:
guc_reset_state(ce, head, guilty);
next_context:
@@ -4817,6 +4818,8 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine)
xa_lock_irqsave(&guc->context_lookup, flags);
xa_for_each(&guc->context_lookup, index, ce) {
+ bool found;
+
if (!kref_get_unless_zero(&ce->ref))
continue;
@@ -4833,10 +4836,18 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine)
goto next;
}
+ found = false;
+ spin_lock(&ce->guc_state.lock);
list_for_each_entry(rq, &ce->guc_state.requests, sched.link) {
if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE)
continue;
+ found = true;
+ break;
+ }
+ spin_unlock(&ce->guc_state.lock);
+
+ if (found) {
intel_engine_set_hung_context(engine, ce);
/* Can only cope with one hang at a time... */
@@ -4844,6 +4855,7 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine)
xa_lock(&guc->context_lookup);
goto done;
}
+
next:
intel_context_put(ce);
xa_lock(&guc->context_lookup);
diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c
index 9f1c209d9251..0616b73175f3 100644
--- a/drivers/gpu/drm/i915/gvt/debugfs.c
+++ b/drivers/gpu/drm/i915/gvt/debugfs.c
@@ -151,6 +151,22 @@ DEFINE_SIMPLE_ATTRIBUTE(vgpu_scan_nonprivbb_fops,
vgpu_scan_nonprivbb_get, vgpu_scan_nonprivbb_set,
"0x%llx\n");
+static int vgpu_status_get(void *data, u64 *val)
+{
+ struct intel_vgpu *vgpu = (struct intel_vgpu *)data;
+
+ *val = 0;
+
+ if (test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
+ *val |= (1 << INTEL_VGPU_STATUS_ATTACHED);
+ if (test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status))
+ *val |= (1 << INTEL_VGPU_STATUS_ACTIVE);
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(vgpu_status_fops, vgpu_status_get, NULL, "0x%llx\n");
+
/**
* intel_gvt_debugfs_add_vgpu - register debugfs entries for a vGPU
* @vgpu: a vGPU
@@ -162,11 +178,12 @@ void intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu)
snprintf(name, 16, "vgpu%d", vgpu->id);
vgpu->debugfs = debugfs_create_dir(name, vgpu->gvt->debugfs_root);
- debugfs_create_bool("active", 0444, vgpu->debugfs, &vgpu->active);
debugfs_create_file("mmio_diff", 0444, vgpu->debugfs, vgpu,
&vgpu_mmio_diff_fops);
debugfs_create_file("scan_nonprivbb", 0644, vgpu->debugfs, vgpu,
&vgpu_scan_nonprivbb_fops);
+ debugfs_create_file("status", 0644, vgpu->debugfs, vgpu,
+ &vgpu_status_fops);
}
/**
@@ -175,8 +192,13 @@ void intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu)
*/
void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu)
{
- debugfs_remove_recursive(vgpu->debugfs);
- vgpu->debugfs = NULL;
+ struct intel_gvt *gvt = vgpu->gvt;
+ struct drm_minor *minor = gvt->gt->i915->drm.primary;
+
+ if (minor->debugfs_root && gvt->debugfs_root) {
+ debugfs_remove_recursive(vgpu->debugfs);
+ vgpu->debugfs = NULL;
+ }
}
/**
@@ -199,6 +221,10 @@ void intel_gvt_debugfs_init(struct intel_gvt *gvt)
*/
void intel_gvt_debugfs_clean(struct intel_gvt *gvt)
{
- debugfs_remove_recursive(gvt->debugfs_root);
- gvt->debugfs_root = NULL;
+ struct drm_minor *minor = gvt->gt->i915->drm.primary;
+
+ if (minor->debugfs_root) {
+ debugfs_remove_recursive(gvt->debugfs_root);
+ gvt->debugfs_root = NULL;
+ }
}
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index 355f1c0e8664..ffe41e9be04f 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -134,7 +134,8 @@ static void dmabuf_gem_object_free(struct kref *kref)
struct list_head *pos;
struct intel_vgpu_dmabuf_obj *dmabuf_obj;
- if (vgpu && vgpu->active && !list_empty(&vgpu->dmabuf_obj_list_head)) {
+ if (vgpu && test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status) &&
+ !list_empty(&vgpu->dmabuf_obj_list_head)) {
list_for_each(pos, &vgpu->dmabuf_obj_list_head) {
dmabuf_obj = list_entry(pos, struct intel_vgpu_dmabuf_obj, list);
if (dmabuf_obj == obj) {
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 51e5e8fb505b..4ec85308379a 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -55,7 +55,7 @@ static bool intel_gvt_is_valid_gfn(struct intel_vgpu *vgpu, unsigned long gfn)
int idx;
bool ret;
- if (!vgpu->attached)
+ if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
return false;
idx = srcu_read_lock(&kvm->srcu);
@@ -1178,7 +1178,7 @@ static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M))
return 0;
- if (!vgpu->attached)
+ if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
return -EINVAL;
pfn = gfn_to_pfn(vgpu->vfio_device.kvm, ops->get_pfn(entry));
if (is_error_noslot_pfn(pfn))
@@ -1209,10 +1209,8 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
for_each_shadow_entry(sub_spt, &sub_se, sub_index) {
ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + sub_index,
PAGE_SIZE, &dma_addr);
- if (ret) {
- ppgtt_invalidate_spt(spt);
- return ret;
- }
+ if (ret)
+ goto err;
sub_se.val64 = se->val64;
/* Copy the PAT field from PDE. */
@@ -1231,6 +1229,17 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
ops->set_pfn(se, sub_spt->shadow_page.mfn);
ppgtt_set_shadow_entry(spt, se, index);
return 0;
+err:
+ /* Cancel the existing addess mappings of DMA addr. */
+ for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) {
+ gvt_vdbg_mm("invalidate 4K entry\n");
+ ppgtt_invalidate_pte(sub_spt, &sub_se);
+ }
+ /* Release the new allocated spt. */
+ trace_spt_change(sub_spt->vgpu->id, "release", sub_spt,
+ sub_spt->guest_page.gfn, sub_spt->shadow_page.type);
+ ppgtt_free_spt(sub_spt);
+ return ret;
}
static int split_64KB_gtt_entry(struct intel_vgpu *vgpu,
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 62823c0e13ab..2d65800d8e93 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -172,13 +172,18 @@ struct intel_vgpu_submission {
#define KVMGT_DEBUGFS_FILENAME "kvmgt_nr_cache_entries"
+enum {
+ INTEL_VGPU_STATUS_ATTACHED = 0,
+ INTEL_VGPU_STATUS_ACTIVE,
+ INTEL_VGPU_STATUS_NR_BITS,
+};
+
struct intel_vgpu {
struct vfio_device vfio_device;
struct intel_gvt *gvt;
struct mutex vgpu_lock;
int id;
- bool active;
- bool attached;
+ DECLARE_BITMAP(status, INTEL_VGPU_STATUS_NR_BITS);
bool pv_notified;
bool failsafe;
unsigned int resetting_eng;
@@ -467,7 +472,7 @@ void intel_vgpu_write_fence(struct intel_vgpu *vgpu,
#define for_each_active_vgpu(gvt, vgpu, id) \
idr_for_each_entry((&(gvt)->vgpu_idr), (vgpu), (id)) \
- for_each_if(vgpu->active)
+ for_each_if(test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status))
static inline void intel_vgpu_write_pci_bar(struct intel_vgpu *vgpu,
u32 offset, u32 val, bool low)
@@ -725,7 +730,7 @@ static inline bool intel_gvt_mmio_is_cmd_write_patch(
static inline int intel_gvt_read_gpa(struct intel_vgpu *vgpu, unsigned long gpa,
void *buf, unsigned long len)
{
- if (!vgpu->attached)
+ if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
return -ESRCH;
return vfio_dma_rw(&vgpu->vfio_device, gpa, buf, len, false);
}
@@ -743,7 +748,7 @@ static inline int intel_gvt_read_gpa(struct intel_vgpu *vgpu, unsigned long gpa,
static inline int intel_gvt_write_gpa(struct intel_vgpu *vgpu,
unsigned long gpa, void *buf, unsigned long len)
{
- if (!vgpu->attached)
+ if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
return -ESRCH;
return vfio_dma_rw(&vgpu->vfio_device, gpa, buf, len, true);
}
diff --git a/drivers/gpu/drm/i915/gvt/interrupt.c b/drivers/gpu/drm/i915/gvt/interrupt.c
index a6b2021b665f..68eca023bbc6 100644
--- a/drivers/gpu/drm/i915/gvt/interrupt.c
+++ b/drivers/gpu/drm/i915/gvt/interrupt.c
@@ -433,7 +433,7 @@ static int inject_virtual_interrupt(struct intel_vgpu *vgpu)
* enabled by guest. so if msi_trigger is null, success is still
* returned and don't inject interrupt into guest.
*/
- if (!vgpu->attached)
+ if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
return -ESRCH;
if (vgpu->msi_trigger && eventfd_signal(vgpu->msi_trigger, 1) != 1)
return -EFAULT;
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index f5451adcd489..8ae7039b3683 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -638,7 +638,7 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
mutex_lock(&vgpu->gvt->lock);
for_each_active_vgpu(vgpu->gvt, itr, id) {
- if (!itr->attached)
+ if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, itr->status))
continue;
if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) {
@@ -655,9 +655,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
{
struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
- if (vgpu->attached)
- return -EEXIST;
-
if (!vgpu->vfio_device.kvm ||
vgpu->vfio_device.kvm->mm != current->mm) {
gvt_vgpu_err("KVM is required to use Intel vGPU\n");
@@ -667,14 +664,14 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
if (__kvmgt_vgpu_exist(vgpu))
return -EEXIST;
- vgpu->attached = true;
-
vgpu->track_node.track_write = kvmgt_page_track_write;
vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
kvm_get_kvm(vgpu->vfio_device.kvm);
kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
&vgpu->track_node);
+ set_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status);
+
debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs,
&vgpu->nr_cache_entries);
@@ -698,11 +695,10 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
{
struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
- if (!vgpu->attached)
- return;
-
intel_gvt_release_vgpu(vgpu);
+ clear_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status);
+
debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));
kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
@@ -718,8 +714,6 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
vgpu->dma_addr_cache = RB_ROOT;
intel_vgpu_release_msi_eventfd_ctx(vgpu);
-
- vgpu->attached = false;
}
static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
@@ -1512,9 +1506,6 @@ static void intel_vgpu_remove(struct mdev_device *mdev)
{
struct intel_vgpu *vgpu = dev_get_drvdata(&mdev->dev);
- if (WARN_ON_ONCE(vgpu->attached))
- return;
-
vfio_unregister_group_dev(&vgpu->vfio_device);
vfio_put_device(&vgpu->vfio_device);
}
@@ -1559,7 +1550,7 @@ int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
struct kvm_memory_slot *slot;
int idx;
- if (!info->attached)
+ if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status))
return -ESRCH;
idx = srcu_read_lock(&kvm->srcu);
@@ -1589,8 +1580,8 @@ int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
struct kvm_memory_slot *slot;
int idx;
- if (!info->attached)
- return 0;
+ if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status))
+ return -ESRCH;
idx = srcu_read_lock(&kvm->srcu);
slot = gfn_to_memslot(kvm, gfn);
@@ -1668,7 +1659,7 @@ int intel_gvt_dma_map_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
struct gvt_dma *entry;
int ret;
- if (!vgpu->attached)
+ if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
return -EINVAL;
mutex_lock(&vgpu->cache_lock);
@@ -1714,8 +1705,8 @@ int intel_gvt_dma_pin_guest_page(struct intel_vgpu *vgpu, dma_addr_t dma_addr)
struct gvt_dma *entry;
int ret = 0;
- if (!vgpu->attached)
- return -ENODEV;
+ if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
+ return -EINVAL;
mutex_lock(&vgpu->cache_lock);
entry = __gvt_cache_find_dma_addr(vgpu, dma_addr);
@@ -1742,7 +1733,7 @@ void intel_gvt_dma_unmap_guest_page(struct intel_vgpu *vgpu,
{
struct gvt_dma *entry;
- if (!vgpu->attached)
+ if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
return;
mutex_lock(&vgpu->cache_lock);
@@ -1778,7 +1769,7 @@ static void intel_gvt_test_and_emulate_vblank(struct intel_gvt *gvt)
idr_for_each_entry((&(gvt)->vgpu_idr), (vgpu), (id)) {
if (test_and_clear_bit(INTEL_GVT_REQUEST_EMULATE_VBLANK + id,
(void *)&gvt->service_request)) {
- if (vgpu->active)
+ if (test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status))
intel_vgpu_emulate_vblank(vgpu);
}
}
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 9cd8fcbf7cad..f4055804aad1 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -695,6 +695,7 @@ intel_vgpu_shadow_mm_pin(struct intel_vgpu_workload *workload)
if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT ||
!workload->shadow_mm->ppgtt_mm.shadowed) {
+ intel_vgpu_unpin_mm(workload->shadow_mm);
gvt_vgpu_err("workload shadow ppgtt isn't ready\n");
return -EINVAL;
}
@@ -865,7 +866,8 @@ pick_next_workload(struct intel_gvt *gvt, struct intel_engine_cs *engine)
goto out;
}
- if (!scheduler->current_vgpu->active ||
+ if (!test_bit(INTEL_VGPU_STATUS_ACTIVE,
+ scheduler->current_vgpu->status) ||
list_empty(workload_q_head(scheduler->current_vgpu, engine)))
goto out;
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index 3c529c2705dd..a5497440484f 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -166,9 +166,7 @@ void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt)
*/
void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu)
{
- mutex_lock(&vgpu->vgpu_lock);
- vgpu->active = true;
- mutex_unlock(&vgpu->vgpu_lock);
+ set_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status);
}
/**
@@ -183,7 +181,7 @@ void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu)
{
mutex_lock(&vgpu->vgpu_lock);
- vgpu->active = false;
+ clear_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status);
if (atomic_read(&vgpu->submission.running_workload_num)) {
mutex_unlock(&vgpu->vgpu_lock);
@@ -228,7 +226,8 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
struct intel_gvt *gvt = vgpu->gvt;
struct drm_i915_private *i915 = gvt->gt->i915;
- drm_WARN(&i915->drm, vgpu->active, "vGPU is still active!\n");
+ drm_WARN(&i915->drm, test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status),
+ "vGPU is still active!\n");
/*
* remove idr first so later clean can judge if need to stop
@@ -285,8 +284,7 @@ struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt)
if (ret)
goto out_free_vgpu;
- vgpu->active = false;
-
+ clear_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status);
return vgpu;
out_free_vgpu:
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 69103ae37779..61c38fc734cf 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -1069,12 +1069,9 @@ static int i915_driver_open(struct drm_device *dev, struct drm_file *file)
*/
static void i915_driver_lastclose(struct drm_device *dev)
{
- struct drm_i915_private *i915 = to_i915(dev);
-
intel_fbdev_restore_mode(dev);
- if (HAS_DISPLAY(i915))
- vga_switcheroo_process_delayed_switch();
+ vga_switcheroo_process_delayed_switch();
}
static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file)
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 9d5d5a397b64..b20bd6365615 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1596,43 +1596,20 @@ capture_engine(struct intel_engine_cs *engine,
{
struct intel_engine_capture_vma *capture = NULL;
struct intel_engine_coredump *ee;
- struct intel_context *ce;
+ struct intel_context *ce = NULL;
struct i915_request *rq = NULL;
- unsigned long flags;
ee = intel_engine_coredump_alloc(engine, ALLOW_FAIL, dump_flags);
if (!ee)
return NULL;
- ce = intel_engine_get_hung_context(engine);
- if (ce) {
- intel_engine_clear_hung_context(engine);
- rq = intel_context_find_active_request(ce);
- if (!rq || !i915_request_started(rq))
- goto no_request_capture;
- } else {
- /*
- * Getting here with GuC enabled means it is a forced error capture
- * with no actual hang. So, no need to attempt the execlist search.
- */
- if (!intel_uc_uses_guc_submission(&engine->gt->uc)) {
- spin_lock_irqsave(&engine->sched_engine->lock, flags);
- rq = intel_engine_execlist_find_hung_request(engine);
- spin_unlock_irqrestore(&engine->sched_engine->lock,
- flags);
- }
- }
- if (rq)
- rq = i915_request_get_rcu(rq);
-
- if (!rq)
+ intel_engine_get_hung_entity(engine, &ce, &rq);
+ if (!rq || !i915_request_started(rq))
goto no_request_capture;
capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL);
- if (!capture) {
- i915_request_put(rq);
+ if (!capture)
goto no_request_capture;
- }
if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
intel_guc_capture_get_matching_node(engine->gt, ee, ce);
@@ -1642,6 +1619,8 @@ capture_engine(struct intel_engine_cs *engine,
return ee;
no_request_capture:
+ if (rq)
+ i915_request_put(rq);
kfree(ee);
return NULL;
}
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index ccd1f864aa19..4fada7ebe8d8 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -423,7 +423,8 @@ static const struct intel_device_info ilk_m_info = {
.has_coherent_ggtt = true, \
.has_llc = 1, \
.has_rc6 = 1, \
- .has_rc6p = 1, \
+ /* snb does support rc6p, but enabling it causes various issues */ \
+ .has_rc6p = 0, \
.has_rps = true, \
.dma_mask_size = 40, \
.__runtime.ppgtt_type = INTEL_PPGTT_ALIASING, \
diff --git a/drivers/gpu/drm/i915/i915_switcheroo.c b/drivers/gpu/drm/i915/i915_switcheroo.c
index 23777d500cdf..f45bd6b6cede 100644
--- a/drivers/gpu/drm/i915/i915_switcheroo.c
+++ b/drivers/gpu/drm/i915/i915_switcheroo.c
@@ -19,6 +19,10 @@ static void i915_switcheroo_set_state(struct pci_dev *pdev,
dev_err(&pdev->dev, "DRM not initialized, aborting switch.\n");
return;
}
+ if (!HAS_DISPLAY(i915)) {
+ dev_err(&pdev->dev, "Device state not initialized, aborting switch.\n");
+ return;
+ }
if (state == VGA_SWITCHEROO_ON) {
drm_info(&i915->drm, "switched on\n");
@@ -44,7 +48,7 @@ static bool i915_switcheroo_can_switch(struct pci_dev *pdev)
* locking inversion with the driver load path. And the access here is
* completely racy anyway. So don't bother with locking for now.
*/
- return i915 && atomic_read(&i915->drm.open_count) == 0;
+ return i915 && HAS_DISPLAY(i915) && atomic_read(&i915->drm.open_count) == 0;
}
static const struct vga_switcheroo_client_ops i915_switcheroo_ops = {
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 3a33be5401ed..135390d975b6 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -2116,7 +2116,7 @@ int i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm)
if (!obj->mm.rsgt)
return -EBUSY;
- err = dma_resv_reserve_fences(obj->base.resv, 1);
+ err = dma_resv_reserve_fences(obj->base.resv, 2);
if (err)
return -EBUSY;
diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c
index 310fb83c527e..2990dd4d4a0d 100644
--- a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c
+++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c
@@ -28,8 +28,7 @@ struct intel_engine_cs *intel_selftest_find_any_engine(struct intel_gt *gt)
int intel_selftest_modify_policy(struct intel_engine_cs *engine,
struct intel_selftest_saved_policy *saved,
- u32 modify_type)
-
+ enum selftest_scheduler_modify modify_type)
{
int err;
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index dba4f7d81d69..80142d9a4a55 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c
@@ -614,6 +614,11 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
break;
}
+ if (ipu_plane->dp_flow == IPU_DP_FLOW_SYNC_BG)
+ width = ipu_src_rect_width(new_state);
+ else
+ width = drm_rect_width(&new_state->src) >> 16;
+
eba = drm_plane_state_to_eba(new_state, 0);
/*
@@ -622,8 +627,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
*/
if (ipu_state->use_pre) {
axi_id = ipu_chan_assign_axi_id(ipu_plane->dma);
- ipu_prg_channel_configure(ipu_plane->ipu_ch, axi_id,
- ipu_src_rect_width(new_state),
+ ipu_prg_channel_configure(ipu_plane->ipu_ch, axi_id, width,
drm_rect_height(&new_state->src) >> 16,
fb->pitches[0], fb->format->format,
fb->modifier, &eba);
@@ -678,9 +682,8 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
break;
}
- ipu_dmfc_config_wait4eot(ipu_plane->dmfc, ALIGN(drm_rect_width(dst), 8));
+ ipu_dmfc_config_wait4eot(ipu_plane->dmfc, width);
- width = ipu_src_rect_width(new_state);
height = drm_rect_height(&new_state->src) >> 16;
info = drm_format_info(fb->format->format);
ipu_calculate_bursts(width, info->cpp[0], fb->pitches[0],
@@ -744,8 +747,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
ipu_cpmem_set_burstsize(ipu_plane->ipu_ch, 16);
ipu_cpmem_zero(ipu_plane->alpha_ch);
- ipu_cpmem_set_resolution(ipu_plane->alpha_ch,
- ipu_src_rect_width(new_state),
+ ipu_cpmem_set_resolution(ipu_plane->alpha_ch, width,
drm_rect_height(&new_state->src) >> 16);
ipu_cpmem_set_format_passthrough(ipu_plane->alpha_ch, 8);
ipu_cpmem_set_high_priority(ipu_plane->alpha_ch);
diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c
index d4b907889a21..cd399b0b7181 100644
--- a/drivers/gpu/drm/meson/meson_viu.c
+++ b/drivers/gpu/drm/meson/meson_viu.c
@@ -436,15 +436,14 @@ void meson_viu_init(struct meson_drm *priv)
/* Initialize OSD1 fifo control register */
reg = VIU_OSD_DDR_PRIORITY_URGENT |
- VIU_OSD_HOLD_FIFO_LINES(31) |
VIU_OSD_FIFO_DEPTH_VAL(32) | /* fifo_depth_val: 32*8=256 */
VIU_OSD_WORDS_PER_BURST(4) | /* 4 words in 1 burst */
VIU_OSD_FIFO_LIMITS(2); /* fifo_lim: 2*16=32 */
if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
- reg |= VIU_OSD_BURST_LENGTH_32;
+ reg |= (VIU_OSD_BURST_LENGTH_32 | VIU_OSD_HOLD_FIFO_LINES(31));
else
- reg |= VIU_OSD_BURST_LENGTH_64;
+ reg |= (VIU_OSD_BURST_LENGTH_64 | VIU_OSD_HOLD_FIFO_LINES(4));
writel_relaxed(reg, priv->io_base + _REG(VIU_OSD1_FIFO_CTRL_STAT));
writel_relaxed(reg, priv->io_base + _REG(VIU_OSD2_FIFO_CTRL_STAT));
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 6484b97c5344..f3c9600221d4 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -876,7 +876,8 @@ static void a6xx_gmu_rpmh_off(struct a6xx_gmu *gmu)
#define GBIF_CLIENT_HALT_MASK BIT(0)
#define GBIF_ARB_HALT_MASK BIT(1)
-static void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu)
+static void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu,
+ bool gx_off)
{
struct msm_gpu *gpu = &adreno_gpu->base;
@@ -889,9 +890,11 @@ static void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu)
return;
}
- /* Halt the gx side of GBIF */
- gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1);
- spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1);
+ if (gx_off) {
+ /* Halt the gx side of GBIF */
+ gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1);
+ spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1);
+ }
/* Halt new client requests on GBIF */
gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
@@ -929,7 +932,7 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu)
/* Halt the gmu cm3 core */
gmu_write(gmu, REG_A6XX_GMU_CM3_SYSRESET, 1);
- a6xx_bus_clear_pending_transactions(adreno_gpu);
+ a6xx_bus_clear_pending_transactions(adreno_gpu, true);
/* Reset GPU core blocks */
gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, 1);
@@ -1083,7 +1086,7 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu)
return;
}
- a6xx_bus_clear_pending_transactions(adreno_gpu);
+ a6xx_bus_clear_pending_transactions(adreno_gpu, a6xx_gpu->hung);
/* tell the GMU we want to slumber */
ret = a6xx_gmu_notify_slumber(gmu);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 36c8fb699b56..3be0f2928b57 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1270,6 +1270,12 @@ static void a6xx_recover(struct msm_gpu *gpu)
if (hang_debug)
a6xx_dump(gpu);
+ /*
+ * To handle recovery specific sequences during the rpm suspend we are
+ * about to trigger
+ */
+ a6xx_gpu->hung = true;
+
/* Halt SQE first */
gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3);
@@ -1312,6 +1318,7 @@ static void a6xx_recover(struct msm_gpu *gpu)
mutex_unlock(&gpu->active_lock);
msm_gpu_hw_init(gpu);
+ a6xx_gpu->hung = false;
}
static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
index ab853f61db63..eea2e60ce3b7 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
@@ -32,6 +32,7 @@ struct a6xx_gpu {
void *llc_slice;
void *htw_llc_slice;
bool have_mmu500;
+ bool hung;
};
#define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base)
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 628806423f7d..36f062c7582f 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -551,13 +551,14 @@ static int adreno_bind(struct device *dev, struct device *master, void *data)
return 0;
}
+static int adreno_system_suspend(struct device *dev);
static void adreno_unbind(struct device *dev, struct device *master,
void *data)
{
struct msm_drm_private *priv = dev_get_drvdata(master);
struct msm_gpu *gpu = dev_to_gpu(dev);
- pm_runtime_force_suspend(dev);
+ WARN_ON_ONCE(adreno_system_suspend(dev));
gpu->funcs->destroy(gpu);
priv->gpu_pdev = NULL;
@@ -609,7 +610,7 @@ static int adreno_remove(struct platform_device *pdev)
static void adreno_shutdown(struct platform_device *pdev)
{
- pm_runtime_force_suspend(&pdev->dev);
+ WARN_ON_ONCE(adreno_system_suspend(&pdev->dev));
}
static const struct of_device_id dt_match[] = {
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 57586c794b84..3605f095b2de 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -352,6 +352,8 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
/* Ensure string is null terminated: */
str[len] = '\0';
+ mutex_lock(&gpu->lock);
+
if (param == MSM_PARAM_COMM) {
paramp = &ctx->comm;
} else {
@@ -361,6 +363,8 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
kfree(*paramp);
*paramp = str;
+ mutex_unlock(&gpu->lock);
+
return 0;
}
case MSM_PARAM_SYSPROF:
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index 5d4b1c95033f..b4f9b1343d63 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -29,11 +29,9 @@ enum {
ADRENO_FW_MAX,
};
-enum adreno_quirks {
- ADRENO_QUIRK_TWO_PASS_USE_WFI = 1,
- ADRENO_QUIRK_FAULT_DETECT_MASK = 2,
- ADRENO_QUIRK_LMLOADKILL_DISABLE = 3,
-};
+#define ADRENO_QUIRK_TWO_PASS_USE_WFI BIT(0)
+#define ADRENO_QUIRK_FAULT_DETECT_MASK BIT(1)
+#define ADRENO_QUIRK_LMLOADKILL_DISABLE BIT(2)
struct adreno_rev {
uint8_t core;
@@ -65,7 +63,7 @@ struct adreno_info {
const char *name;
const char *fw[ADRENO_FW_MAX];
uint32_t gmem;
- enum adreno_quirks quirks;
+ u64 quirks;
struct msm_gpu *(*init)(struct drm_device *dev);
const char *zapfw;
u32 inactive_period;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
index 7cbcef6efe17..62f6ff6abf41 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
@@ -132,7 +132,6 @@ static void dpu_encoder_phys_wb_set_qos(struct dpu_encoder_phys *phys_enc)
* dpu_encoder_phys_wb_setup_fb - setup output framebuffer
* @phys_enc: Pointer to physical encoder
* @fb: Pointer to output framebuffer
- * @wb_roi: Pointer to output region of interest
*/
static void dpu_encoder_phys_wb_setup_fb(struct dpu_encoder_phys *phys_enc,
struct drm_framebuffer *fb)
@@ -692,7 +691,7 @@ static void dpu_encoder_phys_wb_init_ops(struct dpu_encoder_phys_ops *ops)
/**
* dpu_encoder_phys_wb_init - initialize writeback encoder
- * @init: Pointer to init info structure with initialization params
+ * @p: Pointer to init info structure with initialization params
*/
struct dpu_encoder_phys *dpu_encoder_phys_wb_init(
struct dpu_enc_phys_init_params *p)
diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c
index d030a93a08c3..cc3efed593aa 100644
--- a/drivers/gpu/drm/msm/dp/dp_aux.c
+++ b/drivers/gpu/drm/msm/dp/dp_aux.c
@@ -423,6 +423,10 @@ void dp_aux_isr(struct drm_dp_aux *dp_aux)
isr = dp_catalog_aux_get_irq(aux->catalog);
+ /* no interrupts pending, return immediately */
+ if (!isr)
+ return;
+
if (!aux->cmd_busy)
return;
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c
index 4d3fdc806bef..97372bb241d8 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.c
@@ -532,11 +532,19 @@ static int msm_hdmi_dev_probe(struct platform_device *pdev)
ret = devm_pm_runtime_enable(&pdev->dev);
if (ret)
- return ret;
+ goto err_put_phy;
platform_set_drvdata(pdev, hdmi);
- return component_add(&pdev->dev, &msm_hdmi_ops);
+ ret = component_add(&pdev->dev, &msm_hdmi_ops);
+ if (ret)
+ goto err_put_phy;
+
+ return 0;
+
+err_put_phy:
+ msm_hdmi_put_phy(hdmi);
+ return ret;
}
static int msm_hdmi_dev_remove(struct platform_device *pdev)
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 8b0b0ac74a6f..45e81eb148a8 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -1278,7 +1278,7 @@ void msm_drv_shutdown(struct platform_device *pdev)
* msm_drm_init, drm_dev->registered is used as an indicator that the
* shutdown will be successful.
*/
- if (drm && drm->registered)
+ if (drm && drm->registered && priv->kms)
drm_atomic_helper_shutdown(drm);
}
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 30ed45af76ad..380249500325 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -335,6 +335,8 @@ static void get_comm_cmdline(struct msm_gem_submit *submit, char **comm, char **
struct msm_file_private *ctx = submit->queue->ctx;
struct task_struct *task;
+ WARN_ON(!mutex_is_locked(&submit->gpu->lock));
+
/* Note that kstrdup will return NULL if argument is NULL: */
*comm = kstrdup(ctx->comm, GFP_KERNEL);
*cmd = kstrdup(ctx->cmdline, GFP_KERNEL);
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 651786bc55e5..732295e25683 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -376,10 +376,18 @@ struct msm_file_private {
*/
int sysprof;
- /** comm: Overridden task comm, see MSM_PARAM_COMM */
+ /**
+ * comm: Overridden task comm, see MSM_PARAM_COMM
+ *
+ * Accessed under msm_gpu::lock
+ */
char *comm;
- /** cmdline: Overridden task cmdline, see MSM_PARAM_CMDLINE */
+ /**
+ * cmdline: Overridden task cmdline, see MSM_PARAM_CMDLINE
+ *
+ * Accessed under msm_gpu::lock
+ */
char *cmdline;
/**
diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c
index 86b28add1fff..2527afef9c19 100644
--- a/drivers/gpu/drm/msm/msm_mdss.c
+++ b/drivers/gpu/drm/msm/msm_mdss.c
@@ -47,15 +47,17 @@ struct msm_mdss {
static int msm_mdss_parse_data_bus_icc_path(struct device *dev,
struct msm_mdss *msm_mdss)
{
- struct icc_path *path0 = of_icc_get(dev, "mdp0-mem");
- struct icc_path *path1 = of_icc_get(dev, "mdp1-mem");
+ struct icc_path *path0;
+ struct icc_path *path1;
+ path0 = of_icc_get(dev, "mdp0-mem");
if (IS_ERR_OR_NULL(path0))
return PTR_ERR_OR_ZERO(path0);
msm_mdss->path[0] = path0;
msm_mdss->num_paths = 1;
+ path1 = of_icc_get(dev, "mdp1-mem");
if (!IS_ERR_OR_NULL(path1)) {
msm_mdss->path[1] = path1;
msm_mdss->num_paths++;
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
index 40768373cdd9..c5a4f49ee206 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
@@ -97,6 +97,7 @@ int gp100_fb_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct n
int gp102_fb_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fb **);
int gp10b_fb_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fb **);
int gv100_fb_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fb **);
+int tu102_fb_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fb **);
int ga100_fb_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fb **);
int ga102_fb_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fb **);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
deleted file mode 100644
index e87de7906f78..000000000000
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ /dev/null
@@ -1,613 +0,0 @@
-/*
- * Copyright © 2007 David Airlie
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * David Airlie
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/tty.h>
-#include <linux/sysrq.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/screen_info.h>
-#include <linux/vga_switcheroo.h>
-#include <linux/console.h>
-
-#include <drm/drm_crtc.h>
-#include <drm/drm_crtc_helper.h>
-#include <drm/drm_probe_helper.h>
-#include <drm/drm_fb_helper.h>
-#include <drm/drm_fourcc.h>
-#include <drm/drm_atomic.h>
-
-#include "nouveau_drv.h"
-#include "nouveau_gem.h"
-#include "nouveau_bo.h"
-#include "nouveau_fbcon.h"
-#include "nouveau_chan.h"
-#include "nouveau_vmm.h"
-
-#include "nouveau_crtc.h"
-
-MODULE_PARM_DESC(nofbaccel, "Disable fbcon acceleration");
-int nouveau_nofbaccel = 0;
-module_param_named(nofbaccel, nouveau_nofbaccel, int, 0400);
-
-MODULE_PARM_DESC(fbcon_bpp, "fbcon bits-per-pixel (default: auto)");
-static int nouveau_fbcon_bpp;
-module_param_named(fbcon_bpp, nouveau_fbcon_bpp, int, 0400);
-
-static void
-nouveau_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
-{
- struct nouveau_fbdev *fbcon = info->par;
- struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev);
- struct nvif_device *device = &drm->client.device;
- int ret;
-
- if (info->state != FBINFO_STATE_RUNNING)
- return;
-
- ret = -ENODEV;
- if (!in_interrupt() && !(info->flags & FBINFO_HWACCEL_DISABLED) &&
- mutex_trylock(&drm->client.mutex)) {
- if (device->info.family < NV_DEVICE_INFO_V0_TESLA)
- ret = nv04_fbcon_fillrect(info, rect);
- else
- if (device->info.family < NV_DEVICE_INFO_V0_FERMI)
- ret = nv50_fbcon_fillrect(info, rect);
- else
- ret = nvc0_fbcon_fillrect(info, rect);
- mutex_unlock(&drm->client.mutex);
- }
-
- if (ret == 0)
- return;
-
- if (ret != -ENODEV)
- nouveau_fbcon_gpu_lockup(info);
- drm_fb_helper_cfb_fillrect(info, rect);
-}
-
-static void
-nouveau_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *image)
-{
- struct nouveau_fbdev *fbcon = info->par;
- struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev);
- struct nvif_device *device = &drm->client.device;
- int ret;
-
- if (info->state != FBINFO_STATE_RUNNING)
- return;
-
- ret = -ENODEV;
- if (!in_interrupt() && !(info->flags & FBINFO_HWACCEL_DISABLED) &&
- mutex_trylock(&drm->client.mutex)) {
- if (device->info.family < NV_DEVICE_INFO_V0_TESLA)
- ret = nv04_fbcon_copyarea(info, image);
- else
- if (device->info.family < NV_DEVICE_INFO_V0_FERMI)
- ret = nv50_fbcon_copyarea(info, image);
- else
- ret = nvc0_fbcon_copyarea(info, image);
- mutex_unlock(&drm->client.mutex);
- }
-
- if (ret == 0)
- return;
-
- if (ret != -ENODEV)
- nouveau_fbcon_gpu_lockup(info);
- drm_fb_helper_cfb_copyarea(info, image);
-}
-
-static void
-nouveau_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
-{
- struct nouveau_fbdev *fbcon = info->par;
- struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev);
- struct nvif_device *device = &drm->client.device;
- int ret;
-
- if (info->state != FBINFO_STATE_RUNNING)
- return;
-
- ret = -ENODEV;
- if (!in_interrupt() && !(info->flags & FBINFO_HWACCEL_DISABLED) &&
- mutex_trylock(&drm->client.mutex)) {
- if (device->info.family < NV_DEVICE_INFO_V0_TESLA)
- ret = nv04_fbcon_imageblit(info, image);
- else
- if (device->info.family < NV_DEVICE_INFO_V0_FERMI)
- ret = nv50_fbcon_imageblit(info, image);
- else
- ret = nvc0_fbcon_imageblit(info, image);
- mutex_unlock(&drm->client.mutex);
- }
-
- if (ret == 0)
- return;
-
- if (ret != -ENODEV)
- nouveau_fbcon_gpu_lockup(info);
- drm_fb_helper_cfb_imageblit(info, image);
-}
-
-static int
-nouveau_fbcon_sync(struct fb_info *info)
-{
- struct nouveau_fbdev *fbcon = info->par;
- struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev);
- struct nouveau_channel *chan = drm->channel;
- int ret;
-
- if (!chan || !chan->accel_done || in_interrupt() ||
- info->state != FBINFO_STATE_RUNNING ||
- info->flags & FBINFO_HWACCEL_DISABLED)
- return 0;
-
- if (!mutex_trylock(&drm->client.mutex))
- return 0;
-
- ret = nouveau_channel_idle(chan);
- mutex_unlock(&drm->client.mutex);
- if (ret) {
- nouveau_fbcon_gpu_lockup(info);
- return 0;
- }
-
- chan->accel_done = false;
- return 0;
-}
-
-static int
-nouveau_fbcon_open(struct fb_info *info, int user)
-{
- struct nouveau_fbdev *fbcon = info->par;
- struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev);
- int ret = pm_runtime_get_sync(drm->dev->dev);
- if (ret < 0 && ret != -EACCES) {
- pm_runtime_put(drm->dev->dev);
- return ret;
- }
- return 0;
-}
-
-static int
-nouveau_fbcon_release(struct fb_info *info, int user)
-{
- struct nouveau_fbdev *fbcon = info->par;
- struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev);
- pm_runtime_put(drm->dev->dev);
- return 0;
-}
-
-static const struct fb_ops nouveau_fbcon_ops = {
- .owner = THIS_MODULE,
- DRM_FB_HELPER_DEFAULT_OPS,
- .fb_open = nouveau_fbcon_open,
- .fb_release = nouveau_fbcon_release,
- .fb_fillrect = nouveau_fbcon_fillrect,
- .fb_copyarea = nouveau_fbcon_copyarea,
- .fb_imageblit = nouveau_fbcon_imageblit,
- .fb_sync = nouveau_fbcon_sync,
-};
-
-static const struct fb_ops nouveau_fbcon_sw_ops = {
- .owner = THIS_MODULE,
- DRM_FB_HELPER_DEFAULT_OPS,
- .fb_open = nouveau_fbcon_open,
- .fb_release = nouveau_fbcon_release,
- .fb_fillrect = drm_fb_helper_cfb_fillrect,
- .fb_copyarea = drm_fb_helper_cfb_copyarea,
- .fb_imageblit = drm_fb_helper_cfb_imageblit,
-};
-
-void
-nouveau_fbcon_accel_save_disable(struct drm_device *dev)
-{
- struct nouveau_drm *drm = nouveau_drm(dev);
- if (drm->fbcon && drm->fbcon->helper.info) {
- drm->fbcon->saved_flags = drm->fbcon->helper.info->flags;
- drm->fbcon->helper.info->flags |= FBINFO_HWACCEL_DISABLED;
- }
-}
-
-void
-nouveau_fbcon_accel_restore(struct drm_device *dev)
-{
- struct nouveau_drm *drm = nouveau_drm(dev);
- if (drm->fbcon && drm->fbcon->helper.info)
- drm->fbcon->helper.info->flags = drm->fbcon->saved_flags;
-}
-
-static void
-nouveau_fbcon_accel_fini(struct drm_device *dev)
-{
- struct nouveau_drm *drm = nouveau_drm(dev);
- struct nouveau_fbdev *fbcon = drm->fbcon;
- if (fbcon && drm->channel) {
- console_lock();
- if (fbcon->helper.info)
- fbcon->helper.info->flags |= FBINFO_HWACCEL_DISABLED;
- console_unlock();
- nouveau_channel_idle(drm->channel);
- nvif_object_dtor(&fbcon->twod);
- nvif_object_dtor(&fbcon->blit);
- nvif_object_dtor(&fbcon->gdi);
- nvif_object_dtor(&fbcon->patt);
- nvif_object_dtor(&fbcon->rop);
- nvif_object_dtor(&fbcon->clip);
- nvif_object_dtor(&fbcon->surf2d);
- }
-}
-
-static void
-nouveau_fbcon_accel_init(struct drm_device *dev)
-{
- struct nouveau_drm *drm = nouveau_drm(dev);
- struct nouveau_fbdev *fbcon = drm->fbcon;
- struct fb_info *info = fbcon->helper.info;
- int ret;
-
- if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA)
- ret = nv04_fbcon_accel_init(info);
- else
- if (drm->client.device.info.family < NV_DEVICE_INFO_V0_FERMI)
- ret = nv50_fbcon_accel_init(info);
- else
- ret = nvc0_fbcon_accel_init(info);
-
- if (ret == 0)
- info->fbops = &nouveau_fbcon_ops;
-}
-
-static void
-nouveau_fbcon_zfill(struct drm_device *dev, struct nouveau_fbdev *fbcon)
-{
- struct fb_info *info = fbcon->helper.info;
- struct fb_fillrect rect;
-
- /* Clear the entire fbcon. The drm will program every connector
- * with it's preferred mode. If the sizes differ, one display will
- * quite likely have garbage around the console.
- */
- rect.dx = rect.dy = 0;
- rect.width = info->var.xres_virtual;
- rect.height = info->var.yres_virtual;
- rect.color = 0;
- rect.rop = ROP_COPY;
- info->fbops->fb_fillrect(info, &rect);
-}
-
-static int
-nouveau_fbcon_create(struct drm_fb_helper *helper,
- struct drm_fb_helper_surface_size *sizes)
-{
- struct nouveau_fbdev *fbcon =
- container_of(helper, struct nouveau_fbdev, helper);
- struct drm_device *dev = fbcon->helper.dev;
- struct nouveau_drm *drm = nouveau_drm(dev);
- struct nvif_device *device = &drm->client.device;
- struct fb_info *info;
- struct drm_framebuffer *fb;
- struct nouveau_channel *chan;
- struct nouveau_bo *nvbo;
- struct drm_mode_fb_cmd2 mode_cmd = {};
- int ret;
-
- mode_cmd.width = sizes->surface_width;
- mode_cmd.height = sizes->surface_height;
-
- mode_cmd.pitches[0] = mode_cmd.width * (sizes->surface_bpp >> 3);
- mode_cmd.pitches[0] = roundup(mode_cmd.pitches[0], 256);
-
- mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
- sizes->surface_depth);
-
- ret = nouveau_gem_new(&drm->client, mode_cmd.pitches[0] *
- mode_cmd.height, 0, NOUVEAU_GEM_DOMAIN_VRAM,
- 0, 0x0000, &nvbo);
- if (ret) {
- NV_ERROR(drm, "failed to allocate framebuffer\n");
- goto out;
- }
-
- ret = nouveau_framebuffer_new(dev, &mode_cmd, &nvbo->bo.base, &fb);
- if (ret)
- goto out_unref;
-
- ret = nouveau_bo_pin(nvbo, NOUVEAU_GEM_DOMAIN_VRAM, false);
- if (ret) {
- NV_ERROR(drm, "failed to pin fb: %d\n", ret);
- goto out_unref;
- }
-
- ret = nouveau_bo_map(nvbo);
- if (ret) {
- NV_ERROR(drm, "failed to map fb: %d\n", ret);
- goto out_unpin;
- }
-
- chan = nouveau_nofbaccel ? NULL : drm->channel;
- if (chan && device->info.family >= NV_DEVICE_INFO_V0_TESLA) {
- ret = nouveau_vma_new(nvbo, chan->vmm, &fbcon->vma);
- if (ret) {
- NV_ERROR(drm, "failed to map fb into chan: %d\n", ret);
- chan = NULL;
- }
- }
-
- info = drm_fb_helper_alloc_info(helper);
- if (IS_ERR(info)) {
- ret = PTR_ERR(info);
- goto out_unlock;
- }
-
- /* setup helper */
- fbcon->helper.fb = fb;
-
- if (!chan)
- info->flags = FBINFO_HWACCEL_DISABLED;
- else
- info->flags = FBINFO_HWACCEL_COPYAREA |
- FBINFO_HWACCEL_FILLRECT |
- FBINFO_HWACCEL_IMAGEBLIT;
- info->fbops = &nouveau_fbcon_sw_ops;
- info->fix.smem_start = nvbo->bo.resource->bus.offset;
- info->fix.smem_len = nvbo->bo.base.size;
-
- info->screen_base = nvbo_kmap_obj_iovirtual(nvbo);
- info->screen_size = nvbo->bo.base.size;
-
- drm_fb_helper_fill_info(info, &fbcon->helper, sizes);
-
- /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
-
- if (chan)
- nouveau_fbcon_accel_init(dev);
- nouveau_fbcon_zfill(dev, fbcon);
-
- /* To allow resizeing without swapping buffers */
- NV_INFO(drm, "allocated %dx%d fb: 0x%llx, bo %p\n",
- fb->width, fb->height, nvbo->offset, nvbo);
-
- if (dev_is_pci(dev->dev))
- vga_switcheroo_client_fb_set(to_pci_dev(dev->dev), info);
-
- return 0;
-
-out_unlock:
- if (chan)
- nouveau_vma_del(&fbcon->vma);
- nouveau_bo_unmap(nvbo);
-out_unpin:
- nouveau_bo_unpin(nvbo);
-out_unref:
- nouveau_bo_ref(NULL, &nvbo);
-out:
- return ret;
-}
-
-static int
-nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon)
-{
- struct drm_framebuffer *fb = fbcon->helper.fb;
- struct nouveau_bo *nvbo;
-
- drm_fb_helper_unregister_info(&fbcon->helper);
- drm_fb_helper_fini(&fbcon->helper);
-
- if (fb && fb->obj[0]) {
- nvbo = nouveau_gem_object(fb->obj[0]);
- nouveau_vma_del(&fbcon->vma);
- nouveau_bo_unmap(nvbo);
- nouveau_bo_unpin(nvbo);
- drm_framebuffer_put(fb);
- }
-
- return 0;
-}
-
-void nouveau_fbcon_gpu_lockup(struct fb_info *info)
-{
- struct nouveau_fbdev *fbcon = info->par;
- struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev);
-
- NV_ERROR(drm, "GPU lockup - switching to software fbcon\n");
- info->flags |= FBINFO_HWACCEL_DISABLED;
-}
-
-static const struct drm_fb_helper_funcs nouveau_fbcon_helper_funcs = {
- .fb_probe = nouveau_fbcon_create,
-};
-
-static void
-nouveau_fbcon_set_suspend_work(struct work_struct *work)
-{
- struct nouveau_drm *drm = container_of(work, typeof(*drm), fbcon_work);
- int state = READ_ONCE(drm->fbcon_new_state);
-
- if (state == FBINFO_STATE_RUNNING)
- pm_runtime_get_sync(drm->dev->dev);
-
- console_lock();
- if (state == FBINFO_STATE_RUNNING)
- nouveau_fbcon_accel_restore(drm->dev);
- drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
- if (state != FBINFO_STATE_RUNNING)
- nouveau_fbcon_accel_save_disable(drm->dev);
- console_unlock();
-
- if (state == FBINFO_STATE_RUNNING) {
- nouveau_fbcon_hotplug_resume(drm->fbcon);
- pm_runtime_mark_last_busy(drm->dev->dev);
- pm_runtime_put_autosuspend(drm->dev->dev);
- }
-}
-
-void
-nouveau_fbcon_set_suspend(struct drm_device *dev, int state)
-{
- struct nouveau_drm *drm = nouveau_drm(dev);
-
- if (!drm->fbcon)
- return;
-
- drm->fbcon_new_state = state;
- /* Since runtime resume can happen as a result of a sysfs operation,
- * it's possible we already have the console locked. So handle fbcon
- * init/deinit from a seperate work thread
- */
- schedule_work(&drm->fbcon_work);
-}
-
-void
-nouveau_fbcon_output_poll_changed(struct drm_device *dev)
-{
- struct nouveau_drm *drm = nouveau_drm(dev);
- struct nouveau_fbdev *fbcon = drm->fbcon;
- int ret;
-
- if (!fbcon)
- return;
-
- mutex_lock(&fbcon->hotplug_lock);
-
- ret = pm_runtime_get(dev->dev);
- if (ret == 1 || ret == -EACCES) {
- drm_fb_helper_hotplug_event(&fbcon->helper);
-
- pm_runtime_mark_last_busy(dev->dev);
- pm_runtime_put_autosuspend(dev->dev);
- } else if (ret == 0) {
- /* If the GPU was already in the process of suspending before
- * this event happened, then we can't block here as we'll
- * deadlock the runtime pmops since they wait for us to
- * finish. So, just defer this event for when we runtime
- * resume again. It will be handled by fbcon_work.
- */
- NV_DEBUG(drm, "fbcon HPD event deferred until runtime resume\n");
- fbcon->hotplug_waiting = true;
- pm_runtime_put_noidle(drm->dev->dev);
- } else {
- DRM_WARN("fbcon HPD event lost due to RPM failure: %d\n",
- ret);
- }
-
- mutex_unlock(&fbcon->hotplug_lock);
-}
-
-void
-nouveau_fbcon_hotplug_resume(struct nouveau_fbdev *fbcon)
-{
- struct nouveau_drm *drm;
-
- if (!fbcon)
- return;
- drm = nouveau_drm(fbcon->helper.dev);
-
- mutex_lock(&fbcon->hotplug_lock);
- if (fbcon->hotplug_waiting) {
- fbcon->hotplug_waiting = false;
-
- NV_DEBUG(drm, "Handling deferred fbcon HPD events\n");
- drm_fb_helper_hotplug_event(&fbcon->helper);
- }
- mutex_unlock(&fbcon->hotplug_lock);
-}
-
-int
-nouveau_fbcon_init(struct drm_device *dev)
-{
- struct nouveau_drm *drm = nouveau_drm(dev);
- struct nouveau_fbdev *fbcon;
- int preferred_bpp = nouveau_fbcon_bpp;
- int ret;
-
- if (!dev->mode_config.num_crtc ||
- (to_pci_dev(dev->dev)->class >> 8) != PCI_CLASS_DISPLAY_VGA)
- return 0;
-
- fbcon = kzalloc(sizeof(struct nouveau_fbdev), GFP_KERNEL);
- if (!fbcon)
- return -ENOMEM;
-
- drm->fbcon = fbcon;
- INIT_WORK(&drm->fbcon_work, nouveau_fbcon_set_suspend_work);
- mutex_init(&fbcon->hotplug_lock);
-
- drm_fb_helper_prepare(dev, &fbcon->helper, &nouveau_fbcon_helper_funcs);
-
- ret = drm_fb_helper_init(dev, &fbcon->helper);
- if (ret)
- goto free;
-
- if (preferred_bpp != 8 && preferred_bpp != 16 && preferred_bpp != 32) {
- if (drm->client.device.info.ram_size <= 32 * 1024 * 1024)
- preferred_bpp = 8;
- else
- if (drm->client.device.info.ram_size <= 64 * 1024 * 1024)
- preferred_bpp = 16;
- else
- preferred_bpp = 32;
- }
-
- /* disable all the possible outputs/crtcs before entering KMS mode */
- if (!drm_drv_uses_atomic_modeset(dev))
- drm_helper_disable_unused_functions(dev);
-
- ret = drm_fb_helper_initial_config(&fbcon->helper, preferred_bpp);
- if (ret)
- goto fini;
-
- if (fbcon->helper.info)
- fbcon->helper.info->pixmap.buf_align = 4;
- return 0;
-
-fini:
- drm_fb_helper_fini(&fbcon->helper);
-free:
- kfree(fbcon);
- drm->fbcon = NULL;
- return ret;
-}
-
-void
-nouveau_fbcon_fini(struct drm_device *dev)
-{
- struct nouveau_drm *drm = nouveau_drm(dev);
-
- if (!drm->fbcon)
- return;
-
- drm_kms_helper_poll_fini(dev);
- nouveau_fbcon_accel_fini(dev);
- nouveau_fbcon_destroy(dev, drm->fbcon);
- kfree(drm->fbcon);
- drm->fbcon = NULL;
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/firmware.c b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c
index fcf2a002f6cb..91fb494d4009 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/firmware.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c
@@ -151,6 +151,9 @@ nvkm_firmware_mem_page(struct nvkm_memory *memory)
static enum nvkm_memory_target
nvkm_firmware_mem_target(struct nvkm_memory *memory)
{
+ if (nvkm_firmware_mem(memory)->device->func->tegra)
+ return NVKM_MEM_TARGET_NCOH;
+
return NVKM_MEM_TARGET_HOST;
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 364fea320cb3..1c81e5b34d29 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -2405,7 +2405,7 @@ nv162_chipset = {
.bus = { 0x00000001, gf100_bus_new },
.devinit = { 0x00000001, tu102_devinit_new },
.fault = { 0x00000001, tu102_fault_new },
- .fb = { 0x00000001, gv100_fb_new },
+ .fb = { 0x00000001, tu102_fb_new },
.fuse = { 0x00000001, gm107_fuse_new },
.gpio = { 0x00000001, gk104_gpio_new },
.gsp = { 0x00000001, gv100_gsp_new },
@@ -2440,7 +2440,7 @@ nv164_chipset = {
.bus = { 0x00000001, gf100_bus_new },
.devinit = { 0x00000001, tu102_devinit_new },
.fault = { 0x00000001, tu102_fault_new },
- .fb = { 0x00000001, gv100_fb_new },
+ .fb = { 0x00000001, tu102_fb_new },
.fuse = { 0x00000001, gm107_fuse_new },
.gpio = { 0x00000001, gk104_gpio_new },
.gsp = { 0x00000001, gv100_gsp_new },
@@ -2475,7 +2475,7 @@ nv166_chipset = {
.bus = { 0x00000001, gf100_bus_new },
.devinit = { 0x00000001, tu102_devinit_new },
.fault = { 0x00000001, tu102_fault_new },
- .fb = { 0x00000001, gv100_fb_new },
+ .fb = { 0x00000001, tu102_fb_new },
.fuse = { 0x00000001, gm107_fuse_new },
.gpio = { 0x00000001, gk104_gpio_new },
.gsp = { 0x00000001, gv100_gsp_new },
@@ -2510,7 +2510,7 @@ nv167_chipset = {
.bus = { 0x00000001, gf100_bus_new },
.devinit = { 0x00000001, tu102_devinit_new },
.fault = { 0x00000001, tu102_fault_new },
- .fb = { 0x00000001, gv100_fb_new },
+ .fb = { 0x00000001, tu102_fb_new },
.fuse = { 0x00000001, gm107_fuse_new },
.gpio = { 0x00000001, gk104_gpio_new },
.gsp = { 0x00000001, gv100_gsp_new },
@@ -2545,7 +2545,7 @@ nv168_chipset = {
.bus = { 0x00000001, gf100_bus_new },
.devinit = { 0x00000001, tu102_devinit_new },
.fault = { 0x00000001, tu102_fault_new },
- .fb = { 0x00000001, gv100_fb_new },
+ .fb = { 0x00000001, tu102_fb_new },
.fuse = { 0x00000001, gm107_fuse_new },
.gpio = { 0x00000001, gk104_gpio_new },
.gsp = { 0x00000001, gv100_gsp_new },
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c b/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c
index 393ade9f7e6c..b7da3ab44c27 100644
--- a/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c
@@ -48,6 +48,16 @@ gm200_flcn_pio_dmem_rd(struct nvkm_falcon *falcon, u8 port, const u8 *img, int l
img += 4;
len -= 4;
}
+
+ /* Sigh. Tegra PMU FW's init message... */
+ if (len) {
+ u32 data = nvkm_falcon_rd32(falcon, 0x1c4 + (port * 8));
+
+ while (len--) {
+ *(u8 *)img++ = data & 0xff;
+ data >>= 8;
+ }
+ }
}
static void
@@ -64,6 +74,8 @@ gm200_flcn_pio_dmem_wr(struct nvkm_falcon *falcon, u8 port, const u8 *img, int l
img += 4;
len -= 4;
}
+
+ WARN_ON(len);
}
static void
@@ -74,7 +86,7 @@ gm200_flcn_pio_dmem_wr_init(struct nvkm_falcon *falcon, u8 port, bool sec, u32 d
const struct nvkm_falcon_func_pio
gm200_flcn_dmem_pio = {
- .min = 4,
+ .min = 1,
.max = 0x100,
.wr_init = gm200_flcn_pio_dmem_wr_init,
.wr = gm200_flcn_pio_dmem_wr,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c
index 634f64f88fc8..81a1ad2c88a7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c
@@ -65,10 +65,33 @@ tu102_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq)
return ret;
}
+static int
+tu102_devinit_wait(struct nvkm_device *device)
+{
+ unsigned timeout = 50 + 2000;
+
+ do {
+ if (nvkm_rd32(device, 0x118128) & 0x00000001) {
+ if ((nvkm_rd32(device, 0x118234) & 0x000000ff) == 0xff)
+ return 0;
+ }
+
+ usleep_range(1000, 2000);
+ } while (timeout--);
+
+ return -ETIMEDOUT;
+}
+
int
tu102_devinit_post(struct nvkm_devinit *base, bool post)
{
struct nv50_devinit *init = nv50_devinit(base);
+ int ret;
+
+ ret = tu102_devinit_wait(init->base.subdev.device);
+ if (ret)
+ return ret;
+
gm200_devinit_preos(init, post);
return 0;
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
index 5d0bab8ecb43..6ba5120a2ebe 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
@@ -32,6 +32,7 @@ nvkm-y += nvkm/subdev/fb/gp100.o
nvkm-y += nvkm/subdev/fb/gp102.o
nvkm-y += nvkm/subdev/fb/gp10b.o
nvkm-y += nvkm/subdev/fb/gv100.o
+nvkm-y += nvkm/subdev/fb/tu102.o
nvkm-y += nvkm/subdev/fb/ga100.o
nvkm-y += nvkm/subdev/fb/ga102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c
index 8b7c8ea5e8a5..5a21b0ae4595 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c
@@ -40,12 +40,6 @@ ga102_fb_vpr_scrub(struct nvkm_fb *fb)
return ret;
}
-static bool
-ga102_fb_vpr_scrub_required(struct nvkm_fb *fb)
-{
- return (nvkm_rd32(fb->subdev.device, 0x1fa80c) & 0x00000010) != 0;
-}
-
static const struct nvkm_fb_func
ga102_fb = {
.dtor = gf100_fb_dtor,
@@ -56,7 +50,7 @@ ga102_fb = {
.sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init,
.ram_new = ga102_ram_new,
.default_bigpage = 16,
- .vpr.scrub_required = ga102_fb_vpr_scrub_required,
+ .vpr.scrub_required = tu102_fb_vpr_scrub_required,
.vpr.scrub = ga102_fb_vpr_scrub,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c
index 1f0126437c1a..0e3c0a8f5d71 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c
@@ -49,8 +49,3 @@ gv100_fb_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst, s
}
MODULE_FIRMWARE("nvidia/gv100/nvdec/scrubber.bin");
-MODULE_FIRMWARE("nvidia/tu102/nvdec/scrubber.bin");
-MODULE_FIRMWARE("nvidia/tu104/nvdec/scrubber.bin");
-MODULE_FIRMWARE("nvidia/tu106/nvdec/scrubber.bin");
-MODULE_FIRMWARE("nvidia/tu116/nvdec/scrubber.bin");
-MODULE_FIRMWARE("nvidia/tu117/nvdec/scrubber.bin");
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
index ac03eac0f261..f517751f94ac 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
@@ -89,4 +89,6 @@ bool gp102_fb_vpr_scrub_required(struct nvkm_fb *);
int gp102_fb_vpr_scrub(struct nvkm_fb *);
int gv100_fb_init_page(struct nvkm_fb *);
+
+bool tu102_fb_vpr_scrub_required(struct nvkm_fb *);
#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/tu102.c
new file mode 100644
index 000000000000..be82af0364ee
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/tu102.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "gf100.h"
+#include "ram.h"
+
+bool
+tu102_fb_vpr_scrub_required(struct nvkm_fb *fb)
+{
+ return (nvkm_rd32(fb->subdev.device, 0x1fa80c) & 0x00000010) != 0;
+}
+
+static const struct nvkm_fb_func
+tu102_fb = {
+ .dtor = gf100_fb_dtor,
+ .oneinit = gf100_fb_oneinit,
+ .init = gm200_fb_init,
+ .init_page = gv100_fb_init_page,
+ .init_unkn = gp100_fb_init_unkn,
+ .sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init,
+ .vpr.scrub_required = tu102_fb_vpr_scrub_required,
+ .vpr.scrub = gp102_fb_vpr_scrub,
+ .ram_new = gp100_ram_new,
+ .default_bigpage = 16,
+};
+
+int
+tu102_fb_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst, struct nvkm_fb **pfb)
+{
+ return gp102_fb_new_(&tu102_fb, device, type, inst, pfb);
+}
+
+MODULE_FIRMWARE("nvidia/tu102/nvdec/scrubber.bin");
+MODULE_FIRMWARE("nvidia/tu104/nvdec/scrubber.bin");
+MODULE_FIRMWARE("nvidia/tu106/nvdec/scrubber.bin");
+MODULE_FIRMWARE("nvidia/tu116/nvdec/scrubber.bin");
+MODULE_FIRMWARE("nvidia/tu117/nvdec/scrubber.bin");
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c
index a72403777329..2ed04da3621d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c
@@ -225,7 +225,7 @@ gm20b_pmu_init(struct nvkm_pmu *pmu)
pmu->initmsg_received = false;
- nvkm_falcon_load_dmem(falcon, &args, addr_args, sizeof(args), 0);
+ nvkm_falcon_pio_wr(falcon, (u8 *)&args, 0, 0, DMEM, addr_args, sizeof(args), 0, false);
nvkm_falcon_start(falcon);
return 0;
}
diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
index 857a2f0420d7..c924f1124ebc 100644
--- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
+++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
@@ -1193,14 +1193,11 @@ static int boe_panel_enter_sleep_mode(struct boe_panel *boe)
return 0;
}
-static int boe_panel_unprepare(struct drm_panel *panel)
+static int boe_panel_disable(struct drm_panel *panel)
{
struct boe_panel *boe = to_boe_panel(panel);
int ret;
- if (!boe->prepared)
- return 0;
-
ret = boe_panel_enter_sleep_mode(boe);
if (ret < 0) {
dev_err(panel->dev, "failed to set panel off: %d\n", ret);
@@ -1209,6 +1206,16 @@ static int boe_panel_unprepare(struct drm_panel *panel)
msleep(150);
+ return 0;
+}
+
+static int boe_panel_unprepare(struct drm_panel *panel)
+{
+ struct boe_panel *boe = to_boe_panel(panel);
+
+ if (!boe->prepared)
+ return 0;
+
if (boe->desc->discharge_on_disable) {
regulator_disable(boe->avee);
regulator_disable(boe->avdd);
@@ -1528,6 +1535,7 @@ static enum drm_panel_orientation boe_panel_get_orientation(struct drm_panel *pa
}
static const struct drm_panel_funcs boe_panel_funcs = {
+ .disable = boe_panel_disable,
.unprepare = boe_panel_unprepare,
.prepare = boe_panel_prepare,
.enable = boe_panel_enable,
diff --git a/drivers/gpu/drm/panfrost/Kconfig b/drivers/gpu/drm/panfrost/Kconfig
index 079600328be1..e6403a9d66ad 100644
--- a/drivers/gpu/drm/panfrost/Kconfig
+++ b/drivers/gpu/drm/panfrost/Kconfig
@@ -3,7 +3,8 @@
config DRM_PANFROST
tristate "Panfrost (DRM support for ARM Mali Midgard/Bifrost GPUs)"
depends on DRM
- depends on ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64)
+ depends on ARM || ARM64 || COMPILE_TEST
+ depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE
depends on MMU
select DRM_SCHED
select IOMMU_SUPPORT
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 2fa5afe21288..919e6cc04982 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -82,6 +82,7 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data,
struct panfrost_gem_object *bo;
struct drm_panfrost_create_bo *args = data;
struct panfrost_gem_mapping *mapping;
+ int ret;
if (!args->size || args->pad ||
(args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP)))
@@ -92,21 +93,29 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data,
!(args->flags & PANFROST_BO_NOEXEC))
return -EINVAL;
- bo = panfrost_gem_create_with_handle(file, dev, args->size, args->flags,
- &args->handle);
+ bo = panfrost_gem_create(dev, args->size, args->flags);
if (IS_ERR(bo))
return PTR_ERR(bo);
+ ret = drm_gem_handle_create(file, &bo->base.base, &args->handle);
+ if (ret)
+ goto out;
+
mapping = panfrost_gem_mapping_get(bo, priv);
- if (!mapping) {
- drm_gem_object_put(&bo->base.base);
- return -EINVAL;
+ if (mapping) {
+ args->offset = mapping->mmnode.start << PAGE_SHIFT;
+ panfrost_gem_mapping_put(mapping);
+ } else {
+ /* This can only happen if the handle from
+ * drm_gem_handle_create() has already been guessed and freed
+ * by user space
+ */
+ ret = -EINVAL;
}
- args->offset = mapping->mmnode.start << PAGE_SHIFT;
- panfrost_gem_mapping_put(mapping);
-
- return 0;
+out:
+ drm_gem_object_put(&bo->base.base);
+ return ret;
}
/**
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
index 293e799e2fe8..3c812fbd126f 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
@@ -235,12 +235,8 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t
}
struct panfrost_gem_object *
-panfrost_gem_create_with_handle(struct drm_file *file_priv,
- struct drm_device *dev, size_t size,
- u32 flags,
- uint32_t *handle)
+panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags)
{
- int ret;
struct drm_gem_shmem_object *shmem;
struct panfrost_gem_object *bo;
@@ -256,16 +252,6 @@ panfrost_gem_create_with_handle(struct drm_file *file_priv,
bo->noexec = !!(flags & PANFROST_BO_NOEXEC);
bo->is_heap = !!(flags & PANFROST_BO_HEAP);
- /*
- * Allocate an id of idr table where the obj is registered
- * and handle has the id what user can see.
- */
- ret = drm_gem_handle_create(file_priv, &shmem->base, handle);
- /* drop reference from allocate - handle holds it now. */
- drm_gem_object_put(&shmem->base);
- if (ret)
- return ERR_PTR(ret);
-
return bo;
}
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h
index 8088d5fd8480..ad2877eeeccd 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.h
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
@@ -69,10 +69,7 @@ panfrost_gem_prime_import_sg_table(struct drm_device *dev,
struct sg_table *sgt);
struct panfrost_gem_object *
-panfrost_gem_create_with_handle(struct drm_file *file_priv,
- struct drm_device *dev, size_t size,
- u32 flags,
- uint32_t *handle);
+panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags);
int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv);
void panfrost_gem_close(struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index fe09e5be79bd..15d04a0ec623 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -81,7 +81,7 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
init_completion(&entity->entity_idle);
/* We start in an idle state. */
- complete(&entity->entity_idle);
+ complete_all(&entity->entity_idle);
spin_lock_init(&entity->rq_lock);
spsc_queue_init(&entity->job_queue);
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 31f3a1267be4..fd22d753b4ed 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -987,7 +987,7 @@ static int drm_sched_main(void *param)
sched_job = drm_sched_entity_pop_job(entity);
if (!sched_job) {
- complete(&entity->entity_idle);
+ complete_all(&entity->entity_idle);
continue;
}
@@ -998,7 +998,7 @@ static int drm_sched_main(void *param)
trace_drm_run_job(sched_job, entity);
fence = sched->ops->run_job(sched_job);
- complete(&entity->entity_idle);
+ complete_all(&entity->entity_idle);
drm_sched_fence_scheduled(s_fence);
if (!IS_ERR_OR_NULL(fence)) {
diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c
index 53464afc2b9a..91f69e62430b 100644
--- a/drivers/gpu/drm/solomon/ssd130x.c
+++ b/drivers/gpu/drm/solomon/ssd130x.c
@@ -656,18 +656,8 @@ static const struct drm_crtc_helper_funcs ssd130x_crtc_helper_funcs = {
.atomic_check = drm_crtc_helper_atomic_check,
};
-static void ssd130x_crtc_reset(struct drm_crtc *crtc)
-{
- struct drm_device *drm = crtc->dev;
- struct ssd130x_device *ssd130x = drm_to_ssd130x(drm);
-
- ssd130x_init(ssd130x);
-
- drm_atomic_helper_crtc_reset(crtc);
-}
-
static const struct drm_crtc_funcs ssd130x_crtc_funcs = {
- .reset = ssd130x_crtc_reset,
+ .reset = drm_atomic_helper_crtc_reset,
.destroy = drm_crtc_cleanup,
.set_config = drm_atomic_helper_set_config,
.page_flip = drm_atomic_helper_page_flip,
@@ -686,6 +676,12 @@ static void ssd130x_encoder_helper_atomic_enable(struct drm_encoder *encoder,
if (ret)
return;
+ ret = ssd130x_init(ssd130x);
+ if (ret) {
+ ssd130x_power_off(ssd130x);
+ return;
+ }
+
ssd130x_write_cmd(ssd130x, 1, SSD130X_DISPLAY_ON);
backlight_enable(ssd130x->bl_dev);
diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
index b29ef1085cad..f896ef85c2f2 100644
--- a/drivers/gpu/drm/tests/Makefile
+++ b/drivers/gpu/drm/tests/Makefile
@@ -12,3 +12,5 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
drm_mm_test.o \
drm_plane_helper_test.o \
drm_rect_test.o
+
+CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN)
diff --git a/drivers/gpu/drm/tests/drm_mm_test.c b/drivers/gpu/drm/tests/drm_mm_test.c
index 89f12d3b4a21..186b28dc7038 100644
--- a/drivers/gpu/drm/tests/drm_mm_test.c
+++ b/drivers/gpu/drm/tests/drm_mm_test.c
@@ -298,9 +298,9 @@ static bool expect_reserve_fail(struct kunit *test, struct drm_mm *mm, struct dr
return false;
}
-static bool check_reserve_boundaries(struct kunit *test, struct drm_mm *mm,
- unsigned int count,
- u64 size)
+static bool noinline_for_stack check_reserve_boundaries(struct kunit *test, struct drm_mm *mm,
+ unsigned int count,
+ u64 size)
{
const struct boundary {
u64 start, size;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index ba3aa0a0fc43..da5493f789df 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -173,7 +173,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
clear = src_iter->ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm));
if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC)))
- ttm_move_memcpy(clear, ttm->num_pages, dst_iter, src_iter);
+ ttm_move_memcpy(clear, PFN_UP(dst_mem->size), dst_iter, src_iter);
if (!src_iter->ops->maps_tt)
ttm_kmap_iter_linear_io_fini(&_src_iter.io, bdev, src_mem);
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index 43d9b3a6a352..c5947ed8cc81 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -179,6 +179,7 @@ static void vc4_bo_destroy(struct vc4_bo *bo)
bo->validated_shader = NULL;
}
+ mutex_destroy(&bo->madv_lock);
drm_gem_dma_free(&bo->base);
}
@@ -394,7 +395,6 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_bo *bo;
- int ret;
if (WARN_ON_ONCE(vc4->is_vc5))
return ERR_PTR(-ENODEV);
@@ -406,9 +406,7 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size)
bo->madv = VC4_MADV_WILLNEED;
refcount_set(&bo->usecnt, 0);
- ret = drmm_mutex_init(dev, &bo->madv_lock);
- if (ret)
- return ERR_PTR(ret);
+ mutex_init(&bo->madv_lock);
mutex_lock(&vc4->bo_lock);
bo->label = VC4_BO_TYPE_KERNEL;
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 0108613e79d5..7258975331ca 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -711,7 +711,7 @@ static int vc4_crtc_atomic_check(struct drm_crtc *crtc,
struct vc4_encoder *vc4_encoder = to_vc4_encoder(encoder);
if (vc4_encoder->type == VC4_ENCODER_TYPE_HDMI0) {
- vc4_state->hvs_load = max(mode->clock * mode->hdisplay / mode->htotal + 1000,
+ vc4_state->hvs_load = max(mode->clock * mode->hdisplay / mode->htotal + 8000,
mode->clock * 9 / 10) * 1000;
} else {
vc4_state->hvs_load = mode->clock * 1000;
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 12a00d644b61..7546103f1499 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -97,6 +97,10 @@
#define VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_1_SHIFT 8
#define VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_1_MASK VC4_MASK(15, 8)
+#define VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_0_MASK VC4_MASK(7, 0)
+#define VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_0_SET_AVMUTE BIT(0)
+#define VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_0_CLEAR_AVMUTE BIT(4)
+
# define VC4_HD_M_SW_RST BIT(2)
# define VC4_HD_M_ENABLE BIT(0)
@@ -1306,7 +1310,6 @@ static void vc5_hdmi_set_timings(struct vc4_hdmi *vc4_hdmi,
VC4_HDMI_VERTB_VBP));
unsigned long flags;
unsigned char gcp;
- bool gcp_en;
u32 reg;
int idx;
@@ -1341,16 +1344,13 @@ static void vc5_hdmi_set_timings(struct vc4_hdmi *vc4_hdmi,
switch (vc4_state->output_bpc) {
case 12:
gcp = 6;
- gcp_en = true;
break;
case 10:
gcp = 5;
- gcp_en = true;
break;
case 8:
default:
- gcp = 4;
- gcp_en = false;
+ gcp = 0;
break;
}
@@ -1359,8 +1359,7 @@ static void vc5_hdmi_set_timings(struct vc4_hdmi *vc4_hdmi,
* doesn't signal in GCP.
*/
if (vc4_state->output_format == VC4_HDMI_OUTPUT_YUV422) {
- gcp = 4;
- gcp_en = false;
+ gcp = 0;
}
reg = HDMI_READ(HDMI_DEEP_COLOR_CONFIG_1);
@@ -1373,11 +1372,12 @@ static void vc5_hdmi_set_timings(struct vc4_hdmi *vc4_hdmi,
reg = HDMI_READ(HDMI_GCP_WORD_1);
reg &= ~VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_1_MASK;
reg |= VC4_SET_FIELD(gcp, VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_1);
+ reg &= ~VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_0_MASK;
+ reg |= VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_0_CLEAR_AVMUTE;
HDMI_WRITE(HDMI_GCP_WORD_1, reg);
reg = HDMI_READ(HDMI_GCP_CONFIG);
- reg &= ~VC5_HDMI_GCP_CONFIG_GCP_ENABLE;
- reg |= gcp_en ? VC5_HDMI_GCP_CONFIG_GCP_ENABLE : 0;
+ reg |= VC5_HDMI_GCP_CONFIG_GCP_ENABLE;
HDMI_WRITE(HDMI_GCP_CONFIG, reg);
reg = HDMI_READ(HDMI_MISC_CONTROL);
@@ -3018,7 +3018,8 @@ static int vc4_hdmi_cec_init(struct vc4_hdmi *vc4_hdmi)
}
vc4_hdmi->cec_adap = cec_allocate_adapter(&vc4_hdmi_cec_adap_ops,
- vc4_hdmi, "vc4",
+ vc4_hdmi,
+ vc4_hdmi->variant->card_name,
CEC_CAP_DEFAULTS |
CEC_CAP_CONNECTOR_INFO, 1);
ret = PTR_ERR_OR_ZERO(vc4_hdmi->cec_adap);
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 8b92a45a3c89..bd5acc4a8687 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -340,7 +340,7 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
{
struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
struct drm_framebuffer *fb = state->fb;
- struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, 0);
+ struct drm_gem_dma_object *bo;
int num_planes = fb->format->num_planes;
struct drm_crtc_state *crtc_state;
u32 h_subsample = fb->format->hsub;
@@ -359,8 +359,10 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
if (ret)
return ret;
- for (i = 0; i < num_planes; i++)
+ for (i = 0; i < num_planes; i++) {
+ bo = drm_fb_dma_get_gem_obj(fb, i);
vc4_state->offsets[i] = bo->dma_addr + fb->offsets[i];
+ }
/*
* We don't support subpixel source positioning for scaling,
diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index 5d05093014ac..da45215a933d 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -126,7 +126,6 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
void __user *user_bo_handles = NULL;
struct virtio_gpu_object_array *buflist = NULL;
struct sync_file *sync_file;
- int in_fence_fd = exbuf->fence_fd;
int out_fence_fd = -1;
void *buf;
uint64_t fence_ctx;
@@ -152,13 +151,11 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
ring_idx = exbuf->ring_idx;
}
- exbuf->fence_fd = -1;
-
virtio_gpu_create_context(dev, file);
if (exbuf->flags & VIRTGPU_EXECBUF_FENCE_FD_IN) {
struct dma_fence *in_fence;
- in_fence = sync_file_get_fence(in_fence_fd);
+ in_fence = sync_file_get_fence(exbuf->fence_fd);
if (!in_fence)
return -EINVAL;
@@ -358,10 +355,18 @@ static int virtio_gpu_resource_create_ioctl(struct drm_device *dev, void *data,
drm_gem_object_release(obj);
return ret;
}
- drm_gem_object_put(obj);
rc->res_handle = qobj->hw_res_handle; /* similiar to a VM address */
rc->bo_handle = handle;
+
+ /*
+ * The handle owns the reference now. But we must drop our
+ * remaining reference *after* we no longer need to dereference
+ * the obj. Otherwise userspace could guess the handle and
+ * race closing it from another thread.
+ */
+ drm_gem_object_put(obj);
+
return 0;
}
@@ -723,11 +728,18 @@ static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev,
drm_gem_object_release(obj);
return ret;
}
- drm_gem_object_put(obj);
rc_blob->res_handle = bo->hw_res_handle;
rc_blob->bo_handle = handle;
+ /*
+ * The handle owns the reference now. But we must drop our
+ * remaining reference *after* we no longer need to dereference
+ * the obj. Otherwise userspace could guess the handle and
+ * race closing it from another thread.
+ */
+ drm_gem_object_put(obj);
+
return 0;
}
diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c
index 8d7728181de0..c7e74cf13022 100644
--- a/drivers/gpu/drm/virtio/virtgpu_object.c
+++ b/drivers/gpu/drm/virtio/virtgpu_object.c
@@ -184,7 +184,7 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev,
struct virtio_gpu_object_array *objs = NULL;
struct drm_gem_shmem_object *shmem_obj;
struct virtio_gpu_object *bo;
- struct virtio_gpu_mem_entry *ents;
+ struct virtio_gpu_mem_entry *ents = NULL;
unsigned int nents;
int ret;
@@ -210,7 +210,7 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev,
ret = -ENOMEM;
objs = virtio_gpu_array_alloc(1);
if (!objs)
- goto err_put_id;
+ goto err_free_entry;
virtio_gpu_array_add_obj(objs, &bo->base.base);
ret = virtio_gpu_array_lock_resv(objs);
@@ -239,6 +239,8 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev,
err_put_objs:
virtio_gpu_array_put_free(objs);
+err_free_entry:
+ kvfree(ents);
err_put_id:
virtio_gpu_resource_id_put(vgdev, bo->hw_res_handle);
err_free_gem:
diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.c b/drivers/gpu/drm/vmwgfx/ttm_object.c
index 932b125ebf3d..ddf8373c1d77 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_object.c
+++ b/drivers/gpu/drm/vmwgfx/ttm_object.c
@@ -254,40 +254,6 @@ void ttm_base_object_unref(struct ttm_base_object **p_base)
kref_put(&base->refcount, ttm_release_base);
}
-/**
- * ttm_base_object_noref_lookup - look up a base object without reference
- * @tfile: The struct ttm_object_file the object is registered with.
- * @key: The object handle.
- *
- * This function looks up a ttm base object and returns a pointer to it
- * without refcounting the pointer. The returned pointer is only valid
- * until ttm_base_object_noref_release() is called, and the object
- * pointed to by the returned pointer may be doomed. Any persistent usage
- * of the object requires a refcount to be taken using kref_get_unless_zero().
- * Iff this function returns successfully it needs to be paired with
- * ttm_base_object_noref_release() and no sleeping- or scheduling functions
- * may be called inbetween these function callse.
- *
- * Return: A pointer to the object if successful or NULL otherwise.
- */
-struct ttm_base_object *
-ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint64_t key)
-{
- struct vmwgfx_hash_item *hash;
- int ret;
-
- rcu_read_lock();
- ret = ttm_tfile_find_ref_rcu(tfile, key, &hash);
- if (ret) {
- rcu_read_unlock();
- return NULL;
- }
-
- __release(RCU);
- return hlist_entry(hash, struct ttm_ref_object, hash)->obj;
-}
-EXPORT_SYMBOL(ttm_base_object_noref_lookup);
-
struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file *tfile,
uint64_t key)
{
@@ -295,15 +261,16 @@ struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file *tfile,
struct vmwgfx_hash_item *hash;
int ret;
- rcu_read_lock();
- ret = ttm_tfile_find_ref_rcu(tfile, key, &hash);
+ spin_lock(&tfile->lock);
+ ret = ttm_tfile_find_ref(tfile, key, &hash);
if (likely(ret == 0)) {
base = hlist_entry(hash, struct ttm_ref_object, hash)->obj;
if (!kref_get_unless_zero(&base->refcount))
base = NULL;
}
- rcu_read_unlock();
+ spin_unlock(&tfile->lock);
+
return base;
}
diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.h b/drivers/gpu/drm/vmwgfx/ttm_object.h
index f0ebbe340ad6..8098a3846bae 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_object.h
+++ b/drivers/gpu/drm/vmwgfx/ttm_object.h
@@ -307,18 +307,4 @@ extern int ttm_prime_handle_to_fd(struct ttm_object_file *tfile,
#define ttm_prime_object_kfree(__obj, __prime) \
kfree_rcu(__obj, __prime.base.rhead)
-struct ttm_base_object *
-ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint64_t key);
-
-/**
- * ttm_base_object_noref_release - release a base object pointer looked up
- * without reference
- *
- * Releases a base object pointer looked up with ttm_base_object_noref_lookup().
- */
-static inline void ttm_base_object_noref_release(void)
-{
- __acquire(RCU);
- rcu_read_unlock();
-}
#endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index 321c551784a1..4dcf2eb7aa80 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -462,6 +462,9 @@ int vmw_bo_create(struct vmw_private *vmw,
return -ENOMEM;
}
+ /*
+ * vmw_bo_init will delete the *p_bo object if it fails
+ */
ret = vmw_bo_init(vmw, *p_bo, size,
placement, interruptible, pin,
bo_free);
@@ -470,7 +473,6 @@ int vmw_bo_create(struct vmw_private *vmw,
return ret;
out_error:
- kfree(*p_bo);
*p_bo = NULL;
return ret;
}
@@ -596,6 +598,7 @@ static int vmw_user_bo_synccpu_release(struct drm_file *filp,
ttm_bo_put(&vmw_bo->base);
}
+ drm_gem_object_put(&vmw_bo->base.base);
return ret;
}
@@ -636,6 +639,7 @@ int vmw_user_bo_synccpu_ioctl(struct drm_device *dev, void *data,
ret = vmw_user_bo_synccpu_grab(vbo, arg->flags);
vmw_bo_unreference(&vbo);
+ drm_gem_object_put(&vbo->base.base);
if (unlikely(ret != 0)) {
if (ret == -ERESTARTSYS || ret == -EBUSY)
return -EBUSY;
@@ -693,7 +697,7 @@ int vmw_bo_unref_ioctl(struct drm_device *dev, void *data,
* struct vmw_buffer_object should be placed.
* Return: Zero on success, Negative error code on error.
*
- * The vmw buffer object pointer will be refcounted.
+ * The vmw buffer object pointer will be refcounted (both ttm and gem)
*/
int vmw_user_bo_lookup(struct drm_file *filp,
uint32_t handle,
@@ -710,50 +714,11 @@ int vmw_user_bo_lookup(struct drm_file *filp,
*out = gem_to_vmw_bo(gobj);
ttm_bo_get(&(*out)->base);
- drm_gem_object_put(gobj);
return 0;
}
/**
- * vmw_user_bo_noref_lookup - Look up a vmw user buffer object without reference
- * @filp: The TTM object file the handle is registered with.
- * @handle: The user buffer object handle.
- *
- * This function looks up a struct vmw_bo and returns a pointer to the
- * struct vmw_buffer_object it derives from without refcounting the pointer.
- * The returned pointer is only valid until vmw_user_bo_noref_release() is
- * called, and the object pointed to by the returned pointer may be doomed.
- * Any persistent usage of the object requires a refcount to be taken using
- * ttm_bo_reference_unless_doomed(). Iff this function returns successfully it
- * needs to be paired with vmw_user_bo_noref_release() and no sleeping-
- * or scheduling functions may be called in between these function calls.
- *
- * Return: A struct vmw_buffer_object pointer if successful or negative
- * error pointer on failure.
- */
-struct vmw_buffer_object *
-vmw_user_bo_noref_lookup(struct drm_file *filp, u32 handle)
-{
- struct vmw_buffer_object *vmw_bo;
- struct ttm_buffer_object *bo;
- struct drm_gem_object *gobj = drm_gem_object_lookup(filp, handle);
-
- if (!gobj) {
- DRM_ERROR("Invalid buffer object handle 0x%08lx.\n",
- (unsigned long)handle);
- return ERR_PTR(-ESRCH);
- }
- vmw_bo = gem_to_vmw_bo(gobj);
- bo = ttm_bo_get_unless_zero(&vmw_bo->base);
- vmw_bo = vmw_buffer_object(bo);
- drm_gem_object_put(gobj);
-
- return vmw_bo;
-}
-
-
-/**
* vmw_bo_fence_single - Utility function to fence a single TTM buffer
* object without unreserving it.
*
@@ -829,7 +794,8 @@ int vmw_dumb_create(struct drm_file *file_priv,
ret = vmw_gem_object_create_with_handle(dev_priv, file_priv,
args->size, &args->handle,
&vbo);
-
+ /* drop reference from allocate - handle holds it now */
+ drm_gem_object_put(&vbo->base.base);
return ret;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index b062b020b378..5acbf5849b27 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -830,12 +830,7 @@ extern int vmw_user_resource_lookup_handle(
uint32_t handle,
const struct vmw_user_resource_conv *converter,
struct vmw_resource **p_res);
-extern struct vmw_resource *
-vmw_user_resource_noref_lookup_handle(struct vmw_private *dev_priv,
- struct ttm_object_file *tfile,
- uint32_t handle,
- const struct vmw_user_resource_conv *
- converter);
+
extern int vmw_stream_claim_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
extern int vmw_stream_unref_ioctl(struct drm_device *dev, void *data,
@@ -875,15 +870,6 @@ static inline bool vmw_resource_mob_attached(const struct vmw_resource *res)
}
/**
- * vmw_user_resource_noref_release - release a user resource pointer looked up
- * without reference
- */
-static inline void vmw_user_resource_noref_release(void)
-{
- ttm_base_object_noref_release();
-}
-
-/**
* Buffer object helper functions - vmwgfx_bo.c
*/
extern int vmw_bo_pin_in_placement(struct vmw_private *vmw_priv,
@@ -934,8 +920,6 @@ extern void vmw_bo_unmap(struct vmw_buffer_object *vbo);
extern void vmw_bo_move_notify(struct ttm_buffer_object *bo,
struct ttm_resource *mem);
extern void vmw_bo_swap_notify(struct ttm_buffer_object *bo);
-extern struct vmw_buffer_object *
-vmw_user_bo_noref_lookup(struct drm_file *filp, u32 handle);
/**
* vmw_bo_adjust_prio - Adjust the buffer object eviction priority
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index a5379f6fb5ab..c0686283ffd1 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -290,20 +290,26 @@ static void vmw_execbuf_rcache_update(struct vmw_res_cache_entry *rcache,
rcache->valid_handle = 0;
}
+enum vmw_val_add_flags {
+ vmw_val_add_flag_none = 0,
+ vmw_val_add_flag_noctx = 1 << 0,
+};
+
/**
- * vmw_execbuf_res_noref_val_add - Add a resource described by an unreferenced
- * rcu-protected pointer to the validation list.
+ * vmw_execbuf_res_val_add - Add a resource to the validation list.
*
* @sw_context: Pointer to the software context.
* @res: Unreferenced rcu-protected pointer to the resource.
* @dirty: Whether to change dirty status.
+ * @flags: specifies whether to use the context or not
*
* Returns: 0 on success. Negative error code on failure. Typical error codes
* are %-EINVAL on inconsistency and %-ESRCH if the resource was doomed.
*/
-static int vmw_execbuf_res_noref_val_add(struct vmw_sw_context *sw_context,
- struct vmw_resource *res,
- u32 dirty)
+static int vmw_execbuf_res_val_add(struct vmw_sw_context *sw_context,
+ struct vmw_resource *res,
+ u32 dirty,
+ u32 flags)
{
struct vmw_private *dev_priv = res->dev_priv;
int ret;
@@ -318,24 +324,30 @@ static int vmw_execbuf_res_noref_val_add(struct vmw_sw_context *sw_context,
if (dirty)
vmw_validation_res_set_dirty(sw_context->ctx,
rcache->private, dirty);
- vmw_user_resource_noref_release();
return 0;
}
- priv_size = vmw_execbuf_res_size(dev_priv, res_type);
- ret = vmw_validation_add_resource(sw_context->ctx, res, priv_size,
- dirty, (void **)&ctx_info,
- &first_usage);
- vmw_user_resource_noref_release();
- if (ret)
- return ret;
+ if ((flags & vmw_val_add_flag_noctx) != 0) {
+ ret = vmw_validation_add_resource(sw_context->ctx, res, 0, dirty,
+ (void **)&ctx_info, NULL);
+ if (ret)
+ return ret;
- if (priv_size && first_usage) {
- ret = vmw_cmd_ctx_first_setup(dev_priv, sw_context, res,
- ctx_info);
- if (ret) {
- VMW_DEBUG_USER("Failed first usage context setup.\n");
+ } else {
+ priv_size = vmw_execbuf_res_size(dev_priv, res_type);
+ ret = vmw_validation_add_resource(sw_context->ctx, res, priv_size,
+ dirty, (void **)&ctx_info,
+ &first_usage);
+ if (ret)
return ret;
+
+ if (priv_size && first_usage) {
+ ret = vmw_cmd_ctx_first_setup(dev_priv, sw_context, res,
+ ctx_info);
+ if (ret) {
+ VMW_DEBUG_USER("Failed first usage context setup.\n");
+ return ret;
+ }
}
}
@@ -344,43 +356,6 @@ static int vmw_execbuf_res_noref_val_add(struct vmw_sw_context *sw_context,
}
/**
- * vmw_execbuf_res_noctx_val_add - Add a non-context resource to the resource
- * validation list if it's not already on it
- *
- * @sw_context: Pointer to the software context.
- * @res: Pointer to the resource.
- * @dirty: Whether to change dirty status.
- *
- * Returns: Zero on success. Negative error code on failure.
- */
-static int vmw_execbuf_res_noctx_val_add(struct vmw_sw_context *sw_context,
- struct vmw_resource *res,
- u32 dirty)
-{
- struct vmw_res_cache_entry *rcache;
- enum vmw_res_type res_type = vmw_res_type(res);
- void *ptr;
- int ret;
-
- rcache = &sw_context->res_cache[res_type];
- if (likely(rcache->valid && rcache->res == res)) {
- if (dirty)
- vmw_validation_res_set_dirty(sw_context->ctx,
- rcache->private, dirty);
- return 0;
- }
-
- ret = vmw_validation_add_resource(sw_context->ctx, res, 0, dirty,
- &ptr, NULL);
- if (ret)
- return ret;
-
- vmw_execbuf_rcache_update(rcache, res, ptr);
-
- return 0;
-}
-
-/**
* vmw_view_res_val_add - Add a view and the surface it's pointing to to the
* validation list
*
@@ -398,13 +373,13 @@ static int vmw_view_res_val_add(struct vmw_sw_context *sw_context,
* First add the resource the view is pointing to, otherwise it may be
* swapped out when the view is validated.
*/
- ret = vmw_execbuf_res_noctx_val_add(sw_context, vmw_view_srf(view),
- vmw_view_dirtying(view));
+ ret = vmw_execbuf_res_val_add(sw_context, vmw_view_srf(view),
+ vmw_view_dirtying(view), vmw_val_add_flag_noctx);
if (ret)
return ret;
- return vmw_execbuf_res_noctx_val_add(sw_context, view,
- VMW_RES_DIRTY_NONE);
+ return vmw_execbuf_res_val_add(sw_context, view, VMW_RES_DIRTY_NONE,
+ vmw_val_add_flag_noctx);
}
/**
@@ -475,8 +450,9 @@ static int vmw_resource_context_res_add(struct vmw_private *dev_priv,
if (IS_ERR(res))
continue;
- ret = vmw_execbuf_res_noctx_val_add(sw_context, res,
- VMW_RES_DIRTY_SET);
+ ret = vmw_execbuf_res_val_add(sw_context, res,
+ VMW_RES_DIRTY_SET,
+ vmw_val_add_flag_noctx);
if (unlikely(ret != 0))
return ret;
}
@@ -490,9 +466,9 @@ static int vmw_resource_context_res_add(struct vmw_private *dev_priv,
if (vmw_res_type(entry->res) == vmw_res_view)
ret = vmw_view_res_val_add(sw_context, entry->res);
else
- ret = vmw_execbuf_res_noctx_val_add
- (sw_context, entry->res,
- vmw_binding_dirtying(entry->bt));
+ ret = vmw_execbuf_res_val_add(sw_context, entry->res,
+ vmw_binding_dirtying(entry->bt),
+ vmw_val_add_flag_noctx);
if (unlikely(ret != 0))
break;
}
@@ -658,7 +634,8 @@ vmw_cmd_res_check(struct vmw_private *dev_priv,
{
struct vmw_res_cache_entry *rcache = &sw_context->res_cache[res_type];
struct vmw_resource *res;
- int ret;
+ int ret = 0;
+ bool needs_unref = false;
if (p_res)
*p_res = NULL;
@@ -683,17 +660,18 @@ vmw_cmd_res_check(struct vmw_private *dev_priv,
if (ret)
return ret;
- res = vmw_user_resource_noref_lookup_handle
- (dev_priv, sw_context->fp->tfile, *id_loc, converter);
- if (IS_ERR(res)) {
+ ret = vmw_user_resource_lookup_handle
+ (dev_priv, sw_context->fp->tfile, *id_loc, converter, &res);
+ if (ret != 0) {
VMW_DEBUG_USER("Could not find/use resource 0x%08x.\n",
(unsigned int) *id_loc);
- return PTR_ERR(res);
+ return ret;
}
+ needs_unref = true;
- ret = vmw_execbuf_res_noref_val_add(sw_context, res, dirty);
+ ret = vmw_execbuf_res_val_add(sw_context, res, dirty, vmw_val_add_flag_none);
if (unlikely(ret != 0))
- return ret;
+ goto res_check_done;
if (rcache->valid && rcache->res == res) {
rcache->valid_handle = true;
@@ -708,7 +686,11 @@ vmw_cmd_res_check(struct vmw_private *dev_priv,
if (p_res)
*p_res = res;
- return 0;
+res_check_done:
+ if (needs_unref)
+ vmw_resource_unreference(&res);
+
+ return ret;
}
/**
@@ -1171,13 +1153,14 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv,
int ret;
vmw_validation_preload_bo(sw_context->ctx);
- vmw_bo = vmw_user_bo_noref_lookup(sw_context->filp, handle);
- if (IS_ERR(vmw_bo)) {
- VMW_DEBUG_USER("Could not find or use MOB buffer.\n");
+ ret = vmw_user_bo_lookup(sw_context->filp, handle, &vmw_bo);
+ if (ret != 0) {
+ drm_dbg(&dev_priv->drm, "Could not find or use MOB buffer.\n");
return PTR_ERR(vmw_bo);
}
ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo, true, false);
ttm_bo_put(&vmw_bo->base);
+ drm_gem_object_put(&vmw_bo->base.base);
if (unlikely(ret != 0))
return ret;
@@ -1225,13 +1208,14 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
int ret;
vmw_validation_preload_bo(sw_context->ctx);
- vmw_bo = vmw_user_bo_noref_lookup(sw_context->filp, handle);
- if (IS_ERR(vmw_bo)) {
- VMW_DEBUG_USER("Could not find or use GMR region.\n");
+ ret = vmw_user_bo_lookup(sw_context->filp, handle, &vmw_bo);
+ if (ret != 0) {
+ drm_dbg(&dev_priv->drm, "Could not find or use GMR region.\n");
return PTR_ERR(vmw_bo);
}
ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo, false, false);
ttm_bo_put(&vmw_bo->base);
+ drm_gem_object_put(&vmw_bo->base.base);
if (unlikely(ret != 0))
return ret;
@@ -2025,8 +2009,9 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv,
res = vmw_shader_lookup(vmw_context_res_man(ctx),
cmd->body.shid, cmd->body.type);
if (!IS_ERR(res)) {
- ret = vmw_execbuf_res_noctx_val_add(sw_context, res,
- VMW_RES_DIRTY_NONE);
+ ret = vmw_execbuf_res_val_add(sw_context, res,
+ VMW_RES_DIRTY_NONE,
+ vmw_val_add_flag_noctx);
if (unlikely(ret != 0))
return ret;
@@ -2273,8 +2258,9 @@ static int vmw_cmd_dx_set_shader(struct vmw_private *dev_priv,
return PTR_ERR(res);
}
- ret = vmw_execbuf_res_noctx_val_add(sw_context, res,
- VMW_RES_DIRTY_NONE);
+ ret = vmw_execbuf_res_val_add(sw_context, res,
+ VMW_RES_DIRTY_NONE,
+ vmw_val_add_flag_noctx);
if (ret)
return ret;
}
@@ -2777,8 +2763,8 @@ static int vmw_cmd_dx_bind_shader(struct vmw_private *dev_priv,
return PTR_ERR(res);
}
- ret = vmw_execbuf_res_noctx_val_add(sw_context, res,
- VMW_RES_DIRTY_NONE);
+ ret = vmw_execbuf_res_val_add(sw_context, res, VMW_RES_DIRTY_NONE,
+ vmw_val_add_flag_noctx);
if (ret) {
VMW_DEBUG_USER("Error creating resource validation node.\n");
return ret;
@@ -3098,8 +3084,8 @@ static int vmw_cmd_dx_bind_streamoutput(struct vmw_private *dev_priv,
vmw_dx_streamoutput_set_size(res, cmd->body.sizeInBytes);
- ret = vmw_execbuf_res_noctx_val_add(sw_context, res,
- VMW_RES_DIRTY_NONE);
+ ret = vmw_execbuf_res_val_add(sw_context, res, VMW_RES_DIRTY_NONE,
+ vmw_val_add_flag_noctx);
if (ret) {
DRM_ERROR("Error creating resource validation node.\n");
return ret;
@@ -3148,8 +3134,8 @@ static int vmw_cmd_dx_set_streamoutput(struct vmw_private *dev_priv,
return 0;
}
- ret = vmw_execbuf_res_noctx_val_add(sw_context, res,
- VMW_RES_DIRTY_NONE);
+ ret = vmw_execbuf_res_val_add(sw_context, res, VMW_RES_DIRTY_NONE,
+ vmw_val_add_flag_noctx);
if (ret) {
DRM_ERROR("Error creating resource validation node.\n");
return ret;
@@ -4066,22 +4052,26 @@ static int vmw_execbuf_tie_context(struct vmw_private *dev_priv,
if (ret)
return ret;
- res = vmw_user_resource_noref_lookup_handle
+ ret = vmw_user_resource_lookup_handle
(dev_priv, sw_context->fp->tfile, handle,
- user_context_converter);
- if (IS_ERR(res)) {
+ user_context_converter, &res);
+ if (ret != 0) {
VMW_DEBUG_USER("Could not find or user DX context 0x%08x.\n",
(unsigned int) handle);
- return PTR_ERR(res);
+ return ret;
}
- ret = vmw_execbuf_res_noref_val_add(sw_context, res, VMW_RES_DIRTY_SET);
- if (unlikely(ret != 0))
+ ret = vmw_execbuf_res_val_add(sw_context, res, VMW_RES_DIRTY_SET,
+ vmw_val_add_flag_none);
+ if (unlikely(ret != 0)) {
+ vmw_resource_unreference(&res);
return ret;
+ }
sw_context->dx_ctx_node = vmw_execbuf_info_from_res(sw_context, res);
sw_context->man = vmw_context_res_man(res);
+ vmw_resource_unreference(&res);
return 0;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
index ce609e7d758f..4d2c28e39f4e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
@@ -146,14 +146,12 @@ int vmw_gem_object_create_with_handle(struct vmw_private *dev_priv,
&vmw_sys_placement :
&vmw_vram_sys_placement,
true, false, &vmw_gem_destroy, p_vbo);
-
- (*p_vbo)->base.base.funcs = &vmw_gem_object_funcs;
if (ret != 0)
goto out_no_bo;
+ (*p_vbo)->base.base.funcs = &vmw_gem_object_funcs;
+
ret = drm_gem_handle_create(filp, &(*p_vbo)->base.base, handle);
- /* drop reference from allocate - handle holds it now */
- drm_gem_object_put(&(*p_vbo)->base.base);
out_no_bo:
return ret;
}
@@ -180,6 +178,8 @@ int vmw_gem_object_create_ioctl(struct drm_device *dev, void *data,
rep->map_handle = drm_vma_node_offset_addr(&vbo->base.base.vma_node);
rep->cur_gmr_id = handle;
rep->cur_gmr_offset = 0;
+ /* drop reference from allocate - handle holds it now */
+ drm_gem_object_put(&vbo->base.base);
out_no_bo:
return ret;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 257f090071f1..445d619e1fdc 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -1815,8 +1815,10 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
err_out:
/* vmw_user_lookup_handle takes one ref so does new_fb */
- if (bo)
+ if (bo) {
vmw_bo_unreference(&bo);
+ drm_gem_object_put(&bo->base.base);
+ }
if (surface)
vmw_surface_unreference(&surface);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg_arm64.h b/drivers/gpu/drm/vmwgfx/vmwgfx_msg_arm64.h
index 4f40167ad61f..4f40167ad61f 100755..100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg_arm64.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg_arm64.h
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
index e9f5c89b4ca6..b5b311f2a91a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
@@ -458,6 +458,7 @@ int vmw_overlay_ioctl(struct drm_device *dev, void *data,
ret = vmw_overlay_update_stream(dev_priv, buf, arg, true);
vmw_bo_unreference(&buf);
+ drm_gem_object_put(&buf->base.base);
out_unlock:
mutex_unlock(&overlay->mutex);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index f66caa540e14..c7d645e5ec7b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -281,39 +281,6 @@ out_bad_resource:
return ret;
}
-/**
- * vmw_user_resource_noref_lookup_handle - lookup a struct resource from a
- * TTM user-space handle and perform basic type checks
- *
- * @dev_priv: Pointer to a device private struct
- * @tfile: Pointer to a struct ttm_object_file identifying the caller
- * @handle: The TTM user-space handle
- * @converter: Pointer to an object describing the resource type
- *
- * If the handle can't be found or is associated with an incorrect resource
- * type, -EINVAL will be returned.
- */
-struct vmw_resource *
-vmw_user_resource_noref_lookup_handle(struct vmw_private *dev_priv,
- struct ttm_object_file *tfile,
- uint32_t handle,
- const struct vmw_user_resource_conv
- *converter)
-{
- struct ttm_base_object *base;
-
- base = ttm_base_object_noref_lookup(tfile, handle);
- if (!base)
- return ERR_PTR(-ESRCH);
-
- if (unlikely(ttm_base_object_type(base) != converter->object_type)) {
- ttm_base_object_noref_release();
- return ERR_PTR(-EINVAL);
- }
-
- return converter->base_obj_to_res(base);
-}
-
/*
* Helper function that looks either a surface or bo.
*
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
index 108a496b5d18..51e83dfa1cac 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
@@ -807,6 +807,7 @@ static int vmw_shader_define(struct drm_device *dev, struct drm_file *file_priv,
num_output_sig, tfile, shader_handle);
out_bad_arg:
vmw_bo_unreference(&buffer);
+ drm_gem_object_put(&buffer->base.base);
return ret;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index 3bc63ae768f3..dcfb003841b3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -683,7 +683,7 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base)
container_of(base, struct vmw_user_surface, prime.base);
struct vmw_resource *res = &user_srf->srf.res;
- if (base->shareable && res && res->backup)
+ if (res && res->backup)
drm_gem_object_put(&res->backup->base.base);
*p_base = NULL;
@@ -864,7 +864,11 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
goto out_unlock;
}
vmw_bo_reference(res->backup);
- drm_gem_object_get(&res->backup->base.base);
+ /*
+ * We don't expose the handle to the userspace and surface
+ * already holds a gem reference
+ */
+ drm_gem_handle_delete(file_priv, backup_handle);
}
tmp = vmw_resource_reference(&srf->res);
@@ -1568,8 +1572,6 @@ vmw_gb_surface_define_internal(struct drm_device *dev,
drm_vma_node_offset_addr(&res->backup->base.base.vma_node);
rep->buffer_size = res->backup->base.base.size;
rep->buffer_handle = backup_handle;
- if (user_srf->prime.base.shareable)
- drm_gem_object_get(&res->backup->base.base);
} else {
rep->buffer_map_handle = 0;
rep->buffer_size = 0;
diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c
index 0d8e6bd1ccbf..90996c108146 100644
--- a/drivers/gpu/drm/xen/xen_drm_front.c
+++ b/drivers/gpu/drm/xen/xen_drm_front.c
@@ -717,7 +717,7 @@ static int xen_drv_probe(struct xenbus_device *xb_dev,
return xenbus_switch_state(xb_dev, XenbusStateInitialising);
}
-static int xen_drv_remove(struct xenbus_device *dev)
+static void xen_drv_remove(struct xenbus_device *dev)
{
struct xen_drm_front_info *front_info = dev_get_drvdata(&dev->dev);
int to = 100;
@@ -751,7 +751,6 @@ static int xen_drv_remove(struct xenbus_device *dev)
xen_drm_drv_fini(front_info);
xenbus_frontend_closed(dev);
- return 0;
}
static const struct xenbus_device_id xen_driver_ids[] = {
diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c
index ab125f79408f..c751d12f5df8 100644
--- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c
@@ -227,6 +227,7 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata)
cl_data->num_hid_devices = amd_mp2_get_sensor_num(privdata, &cl_data->sensor_idx[0]);
if (cl_data->num_hid_devices == 0)
return -ENODEV;
+ cl_data->is_any_sensor_enabled = false;
INIT_DELAYED_WORK(&cl_data->work, amd_sfh_work);
INIT_DELAYED_WORK(&cl_data->work_buffer, amd_sfh_work_buffer);
@@ -282,11 +283,12 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata)
}
rc = mp2_ops->get_rep_desc(cl_idx, cl_data->report_descr[i]);
if (rc)
- return rc;
+ goto cleanup;
mp2_ops->start(privdata, info);
status = amd_sfh_wait_for_response
(privdata, cl_data->sensor_idx[i], SENSOR_ENABLED);
if (status == SENSOR_ENABLED) {
+ cl_data->is_any_sensor_enabled = true;
cl_data->sensor_sts[i] = SENSOR_ENABLED;
rc = amdtp_hid_probe(cl_data->cur_hid_dev, cl_data);
if (rc) {
@@ -301,19 +303,26 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata)
cl_data->sensor_sts[i]);
goto cleanup;
}
+ } else {
+ cl_data->sensor_sts[i] = SENSOR_DISABLED;
+ dev_dbg(dev, "sid 0x%x (%s) status 0x%x\n",
+ cl_data->sensor_idx[i],
+ get_sensor_name(cl_data->sensor_idx[i]),
+ cl_data->sensor_sts[i]);
}
dev_dbg(dev, "sid 0x%x (%s) status 0x%x\n",
cl_data->sensor_idx[i], get_sensor_name(cl_data->sensor_idx[i]),
cl_data->sensor_sts[i]);
}
- if (mp2_ops->discovery_status && mp2_ops->discovery_status(privdata) == 0) {
+ if (!cl_data->is_any_sensor_enabled ||
+ (mp2_ops->discovery_status && mp2_ops->discovery_status(privdata) == 0)) {
amd_sfh_hid_client_deinit(privdata);
for (i = 0; i < cl_data->num_hid_devices; i++) {
devm_kfree(dev, cl_data->feature_report[i]);
devm_kfree(dev, in_data->input_report[i]);
devm_kfree(dev, cl_data->report_descr[i]);
}
- dev_warn(dev, "Failed to discover, sensors not enabled\n");
+ dev_warn(dev, "Failed to discover, sensors not enabled is %d\n", cl_data->is_any_sensor_enabled);
return -EOPNOTSUPP;
}
schedule_delayed_work(&cl_data->work_buffer, msecs_to_jiffies(AMD_SFH_IDLE_LOOP));
diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_hid.h b/drivers/hid/amd-sfh-hid/amd_sfh_hid.h
index 3754fb423e3a..528036892c9d 100644
--- a/drivers/hid/amd-sfh-hid/amd_sfh_hid.h
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_hid.h
@@ -32,6 +32,7 @@ struct amd_input_data {
struct amdtp_cl_data {
u8 init_done;
u32 cur_hid_dev;
+ bool is_any_sensor_enabled;
u32 hid_dev_count;
u32 num_hid_devices;
struct device_info *hid_devices;
diff --git a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c
index 4da2f9f62aba..a1d6e08fab7d 100644
--- a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c
+++ b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c
@@ -160,7 +160,7 @@ static int amd_sfh1_1_hid_client_init(struct amd_mp2_dev *privdata)
}
rc = mp2_ops->get_rep_desc(cl_idx, cl_data->report_descr[i]);
if (rc)
- return rc;
+ goto cleanup;
writel(0, privdata->mmio + AMD_P2C_MSG(0));
mp2_ops->start(privdata, info);
diff --git a/drivers/hid/hid-betopff.c b/drivers/hid/hid-betopff.c
index 467d789f9bc2..25ed7b9a917e 100644
--- a/drivers/hid/hid-betopff.c
+++ b/drivers/hid/hid-betopff.c
@@ -60,7 +60,6 @@ static int betopff_init(struct hid_device *hid)
struct list_head *report_list =
&hid->report_enum[HID_OUTPUT_REPORT].report_list;
struct input_dev *dev;
- int field_count = 0;
int error;
int i, j;
@@ -86,19 +85,21 @@ static int betopff_init(struct hid_device *hid)
* -----------------------------------------
* Do init them with default value.
*/
+ if (report->maxfield < 4) {
+ hid_err(hid, "not enough fields in the report: %d\n",
+ report->maxfield);
+ return -ENODEV;
+ }
for (i = 0; i < report->maxfield; i++) {
+ if (report->field[i]->report_count < 1) {
+ hid_err(hid, "no values in the field\n");
+ return -ENODEV;
+ }
for (j = 0; j < report->field[i]->report_count; j++) {
report->field[i]->value[j] = 0x00;
- field_count++;
}
}
- if (field_count < 4) {
- hid_err(hid, "not enough fields in the report: %d\n",
- field_count);
- return -ENODEV;
- }
-
betopff = kzalloc(sizeof(*betopff), GFP_KERNEL);
if (!betopff)
return -ENOMEM;
diff --git a/drivers/hid/hid-bigbenff.c b/drivers/hid/hid-bigbenff.c
index e8c5e3ac9fff..e8b16665860d 100644
--- a/drivers/hid/hid-bigbenff.c
+++ b/drivers/hid/hid-bigbenff.c
@@ -344,6 +344,11 @@ static int bigben_probe(struct hid_device *hid,
}
report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list;
+ if (list_empty(report_list)) {
+ hid_err(hid, "no output report found\n");
+ error = -ENODEV;
+ goto error_hw_stop;
+ }
bigben->report = list_entry(report_list->next,
struct hid_report, list);
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index bd47628da6be..5c72aef3d3dd 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -993,8 +993,8 @@ struct hid_report *hid_validate_values(struct hid_device *hid,
* Validating on id 0 means we should examine the first
* report in the list.
*/
- report = list_entry(
- hid->report_enum[type].report_list.next,
+ report = list_first_entry_or_null(
+ &hid->report_enum[type].report_list,
struct hid_report, list);
} else {
report = hid->report_enum[type].report_id_hash[id];
@@ -1202,6 +1202,7 @@ int hid_open_report(struct hid_device *device)
__u8 *end;
__u8 *next;
int ret;
+ int i;
static int (*dispatch_type[])(struct hid_parser *parser,
struct hid_item *item) = {
hid_parser_main,
@@ -1252,6 +1253,8 @@ int hid_open_report(struct hid_device *device)
goto err;
}
device->collection_size = HID_DEFAULT_NUM_COLLECTIONS;
+ for (i = 0; i < HID_DEFAULT_NUM_COLLECTIONS; i++)
+ device->collection[i].parent_idx = -1;
ret = -EINVAL;
while ((next = fetch_item(start, end, &item)) != NULL) {
diff --git a/drivers/hid/hid-elecom.c b/drivers/hid/hid-elecom.c
index e59e9911fc37..4fa45ee77503 100644
--- a/drivers/hid/hid-elecom.c
+++ b/drivers/hid/hid-elecom.c
@@ -12,6 +12,7 @@
* Copyright (c) 2017 Alex Manoussakis <amanou@gnu.org>
* Copyright (c) 2017 Tomasz Kramkowski <tk@the-tk.com>
* Copyright (c) 2020 YOSHIOKA Takuma <lo48576@hard-wi.red>
+ * Copyright (c) 2022 Takahiro Fujii <fujii@xaxxi.net>
*/
/*
@@ -89,7 +90,7 @@ static __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc,
case USB_DEVICE_ID_ELECOM_M_DT1URBK:
case USB_DEVICE_ID_ELECOM_M_DT1DRBK:
case USB_DEVICE_ID_ELECOM_M_HT1URBK:
- case USB_DEVICE_ID_ELECOM_M_HT1DRBK:
+ case USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D:
/*
* Report descriptor format:
* 12: button bit count
@@ -99,6 +100,16 @@ static __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc,
*/
mouse_button_fixup(hdev, rdesc, *rsize, 12, 30, 14, 20, 8);
break;
+ case USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C:
+ /*
+ * Report descriptor format:
+ * 22: button bit count
+ * 30: padding bit count
+ * 24: button report size
+ * 16: button usage maximum
+ */
+ mouse_button_fixup(hdev, rdesc, *rsize, 22, 30, 24, 16, 8);
+ break;
}
return rdesc;
}
@@ -112,7 +123,8 @@ static const struct hid_device_id elecom_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1URBK) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1DRBK) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK) },
- { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C) },
{ }
};
MODULE_DEVICE_TABLE(hid, elecom_devices);
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 82713ef3aaa6..9e36b4cd905e 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -274,7 +274,6 @@
#define USB_DEVICE_ID_CH_AXIS_295 0x001c
#define USB_VENDOR_ID_CHERRY 0x046a
-#define USB_DEVICE_ID_CHERRY_MOUSE_000C 0x000c
#define USB_DEVICE_ID_CHERRY_CYMOTION 0x0023
#define USB_DEVICE_ID_CHERRY_CYMOTION_SOLAR 0x0027
@@ -414,6 +413,8 @@
#define I2C_DEVICE_ID_HP_ENVY_X360_15T_DR100 0x29CF
#define I2C_DEVICE_ID_HP_ENVY_X360_EU0009NV 0x2CF9
#define I2C_DEVICE_ID_HP_SPECTRE_X360_15 0x2817
+#define I2C_DEVICE_ID_HP_SPECTRE_X360_13_AW0020NG 0x29DF
+#define I2C_DEVICE_ID_ASUS_TP420IA_TOUCHSCREEN 0x2BC8
#define USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN 0x2544
#define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN 0x2706
#define I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN 0x261A
@@ -429,7 +430,8 @@
#define USB_DEVICE_ID_ELECOM_M_DT1URBK 0x00fe
#define USB_DEVICE_ID_ELECOM_M_DT1DRBK 0x00ff
#define USB_DEVICE_ID_ELECOM_M_HT1URBK 0x010c
-#define USB_DEVICE_ID_ELECOM_M_HT1DRBK 0x010d
+#define USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D 0x010d
+#define USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C 0x011c
#define USB_VENDOR_ID_DREAM_CHEEKY 0x1d34
#define USB_DEVICE_ID_DREAM_CHEEKY_WN 0x0004
@@ -1295,6 +1297,7 @@
#define USB_DEVICE_ID_UGEE_XPPEN_TABLET_G540 0x0075
#define USB_DEVICE_ID_UGEE_XPPEN_TABLET_G640 0x0094
#define USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO01 0x0042
+#define USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO01_V2 0x0905
#define USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_L 0x0935
#define USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_PRO_S 0x0909
#define USB_DEVICE_ID_UGEE_XPPEN_TABLET_STAR06 0x0078
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 9b59e436df0a..77c8c49852b5 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -370,6 +370,8 @@ static const struct hid_device_id hid_battery_quirks[] = {
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH,
USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD),
HID_BATTERY_QUIRK_IGNORE },
+ { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_ASUS_TP420IA_TOUCHSCREEN),
+ HID_BATTERY_QUIRK_IGNORE },
{ HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN),
HID_BATTERY_QUIRK_IGNORE },
{ HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN),
@@ -384,6 +386,8 @@ static const struct hid_device_id hid_battery_quirks[] = {
HID_BATTERY_QUIRK_IGNORE },
{ HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_15),
HID_BATTERY_QUIRK_IGNORE },
+ { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_13_AW0020NG),
+ HID_BATTERY_QUIRK_IGNORE },
{ HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN),
HID_BATTERY_QUIRK_IGNORE },
{ HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_SURFACE_GO2_TOUCHSCREEN),
diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index abf2c95e4d0b..9c1ee8e91e0c 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -3978,7 +3978,8 @@ static void hidpp_connect_event(struct hidpp_device *hidpp)
}
hidpp_initialize_battery(hidpp);
- hidpp_initialize_hires_scroll(hidpp);
+ if (!hid_is_usb(hidpp->hid_dev))
+ hidpp_initialize_hires_scroll(hidpp);
/* forward current battery state */
if (hidpp->capabilities & HIDPP_CAPABILITY_HIDPP10_BATTERY) {
diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c
index f399bf0d3c8c..27c40894acab 100644
--- a/drivers/hid/hid-playstation.c
+++ b/drivers/hid/hid-playstation.c
@@ -944,6 +944,7 @@ ATTRIBUTE_GROUPS(ps_device);
static int dualsense_get_calibration_data(struct dualsense *ds)
{
+ struct hid_device *hdev = ds->base.hdev;
short gyro_pitch_bias, gyro_pitch_plus, gyro_pitch_minus;
short gyro_yaw_bias, gyro_yaw_plus, gyro_yaw_minus;
short gyro_roll_bias, gyro_roll_plus, gyro_roll_minus;
@@ -954,6 +955,7 @@ static int dualsense_get_calibration_data(struct dualsense *ds)
int speed_2x;
int range_2g;
int ret = 0;
+ int i;
uint8_t *buf;
buf = kzalloc(DS_FEATURE_REPORT_CALIBRATION_SIZE, GFP_KERNEL);
@@ -1006,6 +1008,21 @@ static int dualsense_get_calibration_data(struct dualsense *ds)
ds->gyro_calib_data[2].sens_denom = gyro_roll_plus - gyro_roll_minus;
/*
+ * Sanity check gyro calibration data. This is needed to prevent crashes
+ * during report handling of virtual, clone or broken devices not implementing
+ * calibration data properly.
+ */
+ for (i = 0; i < ARRAY_SIZE(ds->gyro_calib_data); i++) {
+ if (ds->gyro_calib_data[i].sens_denom == 0) {
+ hid_warn(hdev, "Invalid gyro calibration data for axis (%d), disabling calibration.",
+ ds->gyro_calib_data[i].abs_code);
+ ds->gyro_calib_data[i].bias = 0;
+ ds->gyro_calib_data[i].sens_numer = DS_GYRO_RANGE;
+ ds->gyro_calib_data[i].sens_denom = S16_MAX;
+ }
+ }
+
+ /*
* Set accelerometer calibration and normalization parameters.
* Data values will be normalized to 1/DS_ACC_RES_PER_G g.
*/
@@ -1027,6 +1044,21 @@ static int dualsense_get_calibration_data(struct dualsense *ds)
ds->accel_calib_data[2].sens_numer = 2*DS_ACC_RES_PER_G;
ds->accel_calib_data[2].sens_denom = range_2g;
+ /*
+ * Sanity check accelerometer calibration data. This is needed to prevent crashes
+ * during report handling of virtual, clone or broken devices not implementing calibration
+ * data properly.
+ */
+ for (i = 0; i < ARRAY_SIZE(ds->accel_calib_data); i++) {
+ if (ds->accel_calib_data[i].sens_denom == 0) {
+ hid_warn(hdev, "Invalid accelerometer calibration data for axis (%d), disabling calibration.",
+ ds->accel_calib_data[i].abs_code);
+ ds->accel_calib_data[i].bias = 0;
+ ds->accel_calib_data[i].sens_numer = DS_ACC_RANGE;
+ ds->accel_calib_data[i].sens_denom = S16_MAX;
+ }
+ }
+
err_free:
kfree(buf);
return ret;
@@ -1737,6 +1769,7 @@ static int dualshock4_get_calibration_data(struct dualshock4 *ds4)
int speed_2x;
int range_2g;
int ret = 0;
+ int i;
uint8_t *buf;
if (ds4->base.hdev->bus == BUS_USB) {
@@ -1831,6 +1864,21 @@ static int dualshock4_get_calibration_data(struct dualshock4 *ds4)
ds4->gyro_calib_data[2].sens_denom = gyro_roll_plus - gyro_roll_minus;
/*
+ * Sanity check gyro calibration data. This is needed to prevent crashes
+ * during report handling of virtual, clone or broken devices not implementing
+ * calibration data properly.
+ */
+ for (i = 0; i < ARRAY_SIZE(ds4->gyro_calib_data); i++) {
+ if (ds4->gyro_calib_data[i].sens_denom == 0) {
+ hid_warn(hdev, "Invalid gyro calibration data for axis (%d), disabling calibration.",
+ ds4->gyro_calib_data[i].abs_code);
+ ds4->gyro_calib_data[i].bias = 0;
+ ds4->gyro_calib_data[i].sens_numer = DS4_GYRO_RANGE;
+ ds4->gyro_calib_data[i].sens_denom = S16_MAX;
+ }
+ }
+
+ /*
* Set accelerometer calibration and normalization parameters.
* Data values will be normalized to 1/DS4_ACC_RES_PER_G g.
*/
@@ -1852,6 +1900,21 @@ static int dualshock4_get_calibration_data(struct dualshock4 *ds4)
ds4->accel_calib_data[2].sens_numer = 2*DS4_ACC_RES_PER_G;
ds4->accel_calib_data[2].sens_denom = range_2g;
+ /*
+ * Sanity check accelerometer calibration data. This is needed to prevent crashes
+ * during report handling of virtual, clone or broken devices not implementing calibration
+ * data properly.
+ */
+ for (i = 0; i < ARRAY_SIZE(ds4->accel_calib_data); i++) {
+ if (ds4->accel_calib_data[i].sens_denom == 0) {
+ hid_warn(hdev, "Invalid accelerometer calibration data for axis (%d), disabling calibration.",
+ ds4->accel_calib_data[i].abs_code);
+ ds4->accel_calib_data[i].bias = 0;
+ ds4->accel_calib_data[i].sens_numer = DS4_ACC_RANGE;
+ ds4->accel_calib_data[i].sens_denom = S16_MAX;
+ }
+ }
+
err_free:
kfree(buf);
return ret;
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index 0e9702c7f7d6..5bc91f68b374 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -54,7 +54,6 @@ static const struct hid_device_id hid_quirks[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FLIGHT_SIM_YOKE), HID_QUIRK_NOGET },
{ HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_PEDALS), HID_QUIRK_NOGET },
{ HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_THROTTLE), HID_QUIRK_NOGET },
- { HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_MOUSE_000C), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB), HID_QUIRK_NO_INIT_REPORTS },
{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE), HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB), HID_QUIRK_NO_INIT_REPORTS },
@@ -394,7 +393,8 @@ static const struct hid_device_id hid_have_special_driver[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1URBK) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1DRBK) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK) },
- { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C) },
#endif
#if IS_ENABLED(CONFIG_HID_ELO)
{ HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0009) },
diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c
index 7fa6fe04f1b2..cfbbc39807a6 100644
--- a/drivers/hid/hid-uclogic-core.c
+++ b/drivers/hid/hid-uclogic-core.c
@@ -526,6 +526,8 @@ static const struct hid_device_id uclogic_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_UGEE,
USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO01) },
{ HID_USB_DEVICE(USB_VENDOR_ID_UGEE,
+ USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO01_V2) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_UGEE,
USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_L) },
{ HID_USB_DEVICE(USB_VENDOR_ID_UGEE,
USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_PRO_S) },
diff --git a/drivers/hid/hid-uclogic-params.c b/drivers/hid/hid-uclogic-params.c
index cd1233d7e253..3c5eea3df328 100644
--- a/drivers/hid/hid-uclogic-params.c
+++ b/drivers/hid/hid-uclogic-params.c
@@ -1656,6 +1656,8 @@ int uclogic_params_init(struct uclogic_params *params,
case VID_PID(USB_VENDOR_ID_UGEE,
USB_DEVICE_ID_UGEE_PARBLO_A610_PRO):
case VID_PID(USB_VENDOR_ID_UGEE,
+ USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO01_V2):
+ case VID_PID(USB_VENDOR_ID_UGEE,
USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_L):
case VID_PID(USB_VENDOR_ID_UGEE,
USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_PRO_S):
diff --git a/drivers/hid/intel-ish-hid/ishtp/dma-if.c b/drivers/hid/intel-ish-hid/ishtp/dma-if.c
index 40554c8daca0..00046cbfd4ed 100644
--- a/drivers/hid/intel-ish-hid/ishtp/dma-if.c
+++ b/drivers/hid/intel-ish-hid/ishtp/dma-if.c
@@ -104,6 +104,11 @@ void *ishtp_cl_get_dma_send_buf(struct ishtp_device *dev,
int required_slots = (size / DMA_SLOT_SIZE)
+ 1 * (size % DMA_SLOT_SIZE != 0);
+ if (!dev->ishtp_dma_tx_map) {
+ dev_err(dev->devc, "Fail to allocate Tx map\n");
+ return NULL;
+ }
+
spin_lock_irqsave(&dev->ishtp_dma_tx_lock, flags);
for (i = 0; i <= (dev->ishtp_dma_num_slots - required_slots); i++) {
free = 1;
@@ -150,6 +155,11 @@ void ishtp_cl_release_dma_acked_mem(struct ishtp_device *dev,
return;
}
+ if (!dev->ishtp_dma_tx_map) {
+ dev_err(dev->devc, "Fail to allocate Tx map\n");
+ return;
+ }
+
i = (msg_addr - dev->ishtp_host_dma_tx_buf) / DMA_SLOT_SIZE;
spin_lock_irqsave(&dev->ishtp_dma_tx_lock, flags);
for (j = 0; j < acked_slots; j++) {
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index cbe43e2567a7..64ac5bdee3a6 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -1963,7 +1963,7 @@ static void hv_balloon_debugfs_init(struct hv_dynmem_device *b)
static void hv_balloon_debugfs_exit(struct hv_dynmem_device *b)
{
- debugfs_remove(debugfs_lookup("hv-balloon", NULL));
+ debugfs_lookup_and_remove("hv-balloon", NULL);
}
#else
diff --git a/drivers/i2c/busses/i2c-axxia.c b/drivers/i2c/busses/i2c-axxia.c
index bdf3b50de8ad..c1c74ce08407 100644
--- a/drivers/i2c/busses/i2c-axxia.c
+++ b/drivers/i2c/busses/i2c-axxia.c
@@ -118,7 +118,7 @@
#define SDA_HOLD_TIME 0x90
/**
- * axxia_i2c_dev - I2C device context
+ * struct axxia_i2c_dev - I2C device context
* @base: pointer to register struct
* @msg: pointer to current message
* @msg_r: pointer to current read message (sequence transfer)
diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c
index a3240ece55b2..581e02cc979a 100644
--- a/drivers/i2c/busses/i2c-designware-common.c
+++ b/drivers/i2c/busses/i2c-designware-common.c
@@ -351,7 +351,8 @@ u32 i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset)
*
* If your hardware is free from tHD;STA issue, try this one.
*/
- return DIV_ROUND_CLOSEST(ic_clk * tSYMBOL, MICRO) - 8 + offset;
+ return DIV_ROUND_CLOSEST_ULL((u64)ic_clk * tSYMBOL, MICRO) -
+ 8 + offset;
else
/*
* Conditional expression:
@@ -367,7 +368,8 @@ u32 i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset)
* The reason why we need to take into account "tf" here,
* is the same as described in i2c_dw_scl_lcnt().
*/
- return DIV_ROUND_CLOSEST(ic_clk * (tSYMBOL + tf), MICRO) - 3 + offset;
+ return DIV_ROUND_CLOSEST_ULL((u64)ic_clk * (tSYMBOL + tf), MICRO) -
+ 3 + offset;
}
u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset)
@@ -383,7 +385,8 @@ u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset)
* account the fall time of SCL signal (tf). Default tf value
* should be 0.3 us, for safety.
*/
- return DIV_ROUND_CLOSEST(ic_clk * (tLOW + tf), MICRO) - 1 + offset;
+ return DIV_ROUND_CLOSEST_ULL((u64)ic_clk * (tLOW + tf), MICRO) -
+ 1 + offset;
}
int i2c_dw_set_sda_hold(struct dw_i2c_dev *dev)
diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c
index e499f96506c5..782fe1ef3ca1 100644
--- a/drivers/i2c/busses/i2c-designware-pcidrv.c
+++ b/drivers/i2c/busses/i2c-designware-pcidrv.c
@@ -396,6 +396,8 @@ static const struct pci_device_id i2_designware_pci_ids[] = {
{ PCI_VDEVICE(ATI, 0x73a4), navi_amd },
{ PCI_VDEVICE(ATI, 0x73e4), navi_amd },
{ PCI_VDEVICE(ATI, 0x73c4), navi_amd },
+ { PCI_VDEVICE(ATI, 0x7444), navi_amd },
+ { PCI_VDEVICE(ATI, 0x7464), navi_amd },
{ 0,}
};
MODULE_DEVICE_TABLE(pci, i2_designware_pci_ids);
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index ba043b547393..74182db03a88 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -351,13 +351,11 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
if (dev->flags & ACCESS_NO_IRQ_SUSPEND) {
dev_pm_set_driver_flags(&pdev->dev,
- DPM_FLAG_SMART_PREPARE |
- DPM_FLAG_MAY_SKIP_RESUME);
+ DPM_FLAG_SMART_PREPARE);
} else {
dev_pm_set_driver_flags(&pdev->dev,
DPM_FLAG_SMART_PREPARE |
- DPM_FLAG_SMART_SUSPEND |
- DPM_FLAG_MAY_SKIP_RESUME);
+ DPM_FLAG_SMART_SUSPEND);
}
device_enable_async_suspend(&pdev->dev);
@@ -419,21 +417,8 @@ static int dw_i2c_plat_prepare(struct device *dev)
*/
return !has_acpi_companion(dev);
}
-
-static void dw_i2c_plat_complete(struct device *dev)
-{
- /*
- * The device can only be in runtime suspend at this point if it has not
- * been resumed throughout the ending system suspend/resume cycle, so if
- * the platform firmware might mess up with it, request the runtime PM
- * framework to resume it.
- */
- if (pm_runtime_suspended(dev) && pm_resume_via_firmware())
- pm_request_resume(dev);
-}
#else
#define dw_i2c_plat_prepare NULL
-#define dw_i2c_plat_complete NULL
#endif
#ifdef CONFIG_PM
@@ -483,7 +468,6 @@ static int __maybe_unused dw_i2c_plat_resume(struct device *dev)
static const struct dev_pm_ops dw_i2c_dev_pm_ops = {
.prepare = dw_i2c_plat_prepare,
- .complete = dw_i2c_plat_complete,
SET_LATE_SYSTEM_SLEEP_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume)
SET_RUNTIME_PM_OPS(dw_i2c_plat_runtime_suspend, dw_i2c_plat_runtime_resume, NULL)
};
diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c
index 5af5cffc444e..d113bed79545 100644
--- a/drivers/i2c/busses/i2c-mxs.c
+++ b/drivers/i2c/busses/i2c-mxs.c
@@ -826,8 +826,8 @@ static int mxs_i2c_probe(struct platform_device *pdev)
/* Setup the DMA */
i2c->dmach = dma_request_chan(dev, "rx-tx");
if (IS_ERR(i2c->dmach)) {
- dev_err(dev, "Failed to request dma\n");
- return PTR_ERR(i2c->dmach);
+ return dev_err_probe(dev, PTR_ERR(i2c->dmach),
+ "Failed to request dma\n");
}
platform_set_drvdata(pdev, i2c);
diff --git a/drivers/i2c/busses/i2c-rk3x.c b/drivers/i2c/busses/i2c-rk3x.c
index d1658ed76562..b31cf4f18f85 100644
--- a/drivers/i2c/busses/i2c-rk3x.c
+++ b/drivers/i2c/busses/i2c-rk3x.c
@@ -80,7 +80,7 @@ enum {
#define DEFAULT_SCL_RATE (100 * 1000) /* Hz */
/**
- * struct i2c_spec_values:
+ * struct i2c_spec_values - I2C specification values for various modes
* @min_hold_start_ns: min hold time (repeated) START condition
* @min_low_ns: min LOW period of the SCL clock
* @min_high_ns: min HIGH period of the SCL cloc
@@ -136,7 +136,7 @@ static const struct i2c_spec_values fast_mode_plus_spec = {
};
/**
- * struct rk3x_i2c_calced_timings:
+ * struct rk3x_i2c_calced_timings - calculated V1 timings
* @div_low: Divider output for low
* @div_high: Divider output for high
* @tuning: Used to adjust setup/hold data time,
@@ -159,7 +159,7 @@ enum rk3x_i2c_state {
};
/**
- * struct rk3x_i2c_soc_data:
+ * struct rk3x_i2c_soc_data - SOC-specific data
* @grf_offset: offset inside the grf regmap for setting the i2c type
* @calc_timings: Callback function for i2c timing information calculated
*/
@@ -239,7 +239,8 @@ static inline void rk3x_i2c_clean_ipd(struct rk3x_i2c *i2c)
}
/**
- * Generate a START condition, which triggers a REG_INT_START interrupt.
+ * rk3x_i2c_start - Generate a START condition, which triggers a REG_INT_START interrupt.
+ * @i2c: target controller data
*/
static void rk3x_i2c_start(struct rk3x_i2c *i2c)
{
@@ -258,8 +259,8 @@ static void rk3x_i2c_start(struct rk3x_i2c *i2c)
}
/**
- * Generate a STOP condition, which triggers a REG_INT_STOP interrupt.
- *
+ * rk3x_i2c_stop - Generate a STOP condition, which triggers a REG_INT_STOP interrupt.
+ * @i2c: target controller data
* @error: Error code to return in rk3x_i2c_xfer
*/
static void rk3x_i2c_stop(struct rk3x_i2c *i2c, int error)
@@ -298,7 +299,8 @@ static void rk3x_i2c_stop(struct rk3x_i2c *i2c, int error)
}
/**
- * Setup a read according to i2c->msg
+ * rk3x_i2c_prepare_read - Setup a read according to i2c->msg
+ * @i2c: target controller data
*/
static void rk3x_i2c_prepare_read(struct rk3x_i2c *i2c)
{
@@ -329,7 +331,8 @@ static void rk3x_i2c_prepare_read(struct rk3x_i2c *i2c)
}
/**
- * Fill the transmit buffer with data from i2c->msg
+ * rk3x_i2c_fill_transmit_buf - Fill the transmit buffer with data from i2c->msg
+ * @i2c: target controller data
*/
static void rk3x_i2c_fill_transmit_buf(struct rk3x_i2c *i2c)
{
@@ -532,11 +535,10 @@ out:
}
/**
- * Get timing values of I2C specification
- *
+ * rk3x_i2c_get_spec - Get timing values of I2C specification
* @speed: Desired SCL frequency
*
- * Returns: Matched i2c spec values.
+ * Return: Matched i2c_spec_values.
*/
static const struct i2c_spec_values *rk3x_i2c_get_spec(unsigned int speed)
{
@@ -549,13 +551,12 @@ static const struct i2c_spec_values *rk3x_i2c_get_spec(unsigned int speed)
}
/**
- * Calculate divider values for desired SCL frequency
- *
+ * rk3x_i2c_v0_calc_timings - Calculate divider values for desired SCL frequency
* @clk_rate: I2C input clock rate
* @t: Known I2C timing information
* @t_calc: Caculated rk3x private timings that would be written into regs
*
- * Returns: 0 on success, -EINVAL if the goal SCL rate is too slow. In that case
+ * Return: %0 on success, -%EINVAL if the goal SCL rate is too slow. In that case
* a best-effort divider value is returned in divs. If the target rate is
* too high, we silently use the highest possible rate.
*/
@@ -710,13 +711,12 @@ static int rk3x_i2c_v0_calc_timings(unsigned long clk_rate,
}
/**
- * Calculate timing values for desired SCL frequency
- *
+ * rk3x_i2c_v1_calc_timings - Calculate timing values for desired SCL frequency
* @clk_rate: I2C input clock rate
* @t: Known I2C timing information
* @t_calc: Caculated rk3x private timings that would be written into regs
*
- * Returns: 0 on success, -EINVAL if the goal SCL rate is too slow. In that case
+ * Return: %0 on success, -%EINVAL if the goal SCL rate is too slow. In that case
* a best-effort divider value is returned in divs. If the target rate is
* too high, we silently use the highest possible rate.
* The following formulas are v1's method to calculate timings.
@@ -960,14 +960,14 @@ static int rk3x_i2c_clk_notifier_cb(struct notifier_block *nb, unsigned long
}
/**
- * Setup I2C registers for an I2C operation specified by msgs, num.
- *
- * Must be called with i2c->lock held.
- *
+ * rk3x_i2c_setup - Setup I2C registers for an I2C operation specified by msgs, num.
+ * @i2c: target controller data
* @msgs: I2C msgs to process
* @num: Number of msgs
*
- * returns: Number of I2C msgs processed or negative in case of error
+ * Must be called with i2c->lock held.
+ *
+ * Return: Number of I2C msgs processed or negative in case of error
*/
static int rk3x_i2c_setup(struct rk3x_i2c *i2c, struct i2c_msg *msgs, int num)
{
diff --git a/drivers/iio/accel/hid-sensor-accel-3d.c b/drivers/iio/accel/hid-sensor-accel-3d.c
index a2def6f9380a..5eac7ea19993 100644
--- a/drivers/iio/accel/hid-sensor-accel-3d.c
+++ b/drivers/iio/accel/hid-sensor-accel-3d.c
@@ -280,6 +280,7 @@ static int accel_3d_capture_sample(struct hid_sensor_hub_device *hsdev,
hid_sensor_convert_timestamp(
&accel_state->common_attributes,
*(int64_t *)raw_data);
+ ret = 0;
break;
default:
break;
diff --git a/drivers/iio/adc/berlin2-adc.c b/drivers/iio/adc/berlin2-adc.c
index 3d2e8b4db61a..a4e7c7eff5ac 100644
--- a/drivers/iio/adc/berlin2-adc.c
+++ b/drivers/iio/adc/berlin2-adc.c
@@ -298,8 +298,10 @@ static int berlin2_adc_probe(struct platform_device *pdev)
int ret;
indio_dev = devm_iio_device_alloc(&pdev->dev, sizeof(*priv));
- if (!indio_dev)
+ if (!indio_dev) {
+ of_node_put(parent_np);
return -ENOMEM;
+ }
priv = iio_priv(indio_dev);
diff --git a/drivers/iio/adc/imx8qxp-adc.c b/drivers/iio/adc/imx8qxp-adc.c
index 36777b827165..f5a0fc9e64c5 100644
--- a/drivers/iio/adc/imx8qxp-adc.c
+++ b/drivers/iio/adc/imx8qxp-adc.c
@@ -86,6 +86,8 @@
#define IMX8QXP_ADC_TIMEOUT msecs_to_jiffies(100)
+#define IMX8QXP_ADC_MAX_FIFO_SIZE 16
+
struct imx8qxp_adc {
struct device *dev;
void __iomem *regs;
@@ -95,6 +97,7 @@ struct imx8qxp_adc {
/* Serialise ADC channel reads */
struct mutex lock;
struct completion completion;
+ u32 fifo[IMX8QXP_ADC_MAX_FIFO_SIZE];
};
#define IMX8QXP_ADC_CHAN(_idx) { \
@@ -238,8 +241,7 @@ static int imx8qxp_adc_read_raw(struct iio_dev *indio_dev,
return ret;
}
- *val = FIELD_GET(IMX8QXP_ADC_RESFIFO_VAL_MASK,
- readl(adc->regs + IMX8QXP_ADR_ADC_RESFIFO));
+ *val = adc->fifo[0];
mutex_unlock(&adc->lock);
return IIO_VAL_INT;
@@ -265,10 +267,15 @@ static irqreturn_t imx8qxp_adc_isr(int irq, void *dev_id)
{
struct imx8qxp_adc *adc = dev_id;
u32 fifo_count;
+ int i;
fifo_count = FIELD_GET(IMX8QXP_ADC_FCTRL_FCOUNT_MASK,
readl(adc->regs + IMX8QXP_ADR_ADC_FCTRL));
+ for (i = 0; i < fifo_count; i++)
+ adc->fifo[i] = FIELD_GET(IMX8QXP_ADC_RESFIFO_VAL_MASK,
+ readl_relaxed(adc->regs + IMX8QXP_ADR_ADC_RESFIFO));
+
if (fifo_count)
complete(&adc->completion);
diff --git a/drivers/iio/adc/stm32-dfsdm-adc.c b/drivers/iio/adc/stm32-dfsdm-adc.c
index 6d21ea84fa82..a428bdb567d5 100644
--- a/drivers/iio/adc/stm32-dfsdm-adc.c
+++ b/drivers/iio/adc/stm32-dfsdm-adc.c
@@ -1520,6 +1520,7 @@ static const struct of_device_id stm32_dfsdm_adc_match[] = {
},
{}
};
+MODULE_DEVICE_TABLE(of, stm32_dfsdm_adc_match);
static int stm32_dfsdm_adc_probe(struct platform_device *pdev)
{
diff --git a/drivers/iio/adc/twl6030-gpadc.c b/drivers/iio/adc/twl6030-gpadc.c
index f53e8558b560..32873fb5f367 100644
--- a/drivers/iio/adc/twl6030-gpadc.c
+++ b/drivers/iio/adc/twl6030-gpadc.c
@@ -57,6 +57,18 @@
#define TWL6030_GPADCS BIT(1)
#define TWL6030_GPADCR BIT(0)
+#define USB_VBUS_CTRL_SET 0x04
+#define USB_ID_CTRL_SET 0x06
+
+#define TWL6030_MISC1 0xE4
+#define VBUS_MEAS 0x01
+#define ID_MEAS 0x01
+
+#define VAC_MEAS 0x04
+#define VBAT_MEAS 0x02
+#define BB_MEAS 0x01
+
+
/**
* struct twl6030_chnl_calib - channel calibration
* @gain: slope coefficient for ideal curve
@@ -927,6 +939,26 @@ static int twl6030_gpadc_probe(struct platform_device *pdev)
return ret;
}
+ ret = twl_i2c_write_u8(TWL_MODULE_USB, VBUS_MEAS, USB_VBUS_CTRL_SET);
+ if (ret < 0) {
+ dev_err(dev, "failed to wire up inputs\n");
+ return ret;
+ }
+
+ ret = twl_i2c_write_u8(TWL_MODULE_USB, ID_MEAS, USB_ID_CTRL_SET);
+ if (ret < 0) {
+ dev_err(dev, "failed to wire up inputs\n");
+ return ret;
+ }
+
+ ret = twl_i2c_write_u8(TWL6030_MODULE_ID0,
+ VBAT_MEAS | BB_MEAS | VAC_MEAS,
+ TWL6030_MISC1);
+ if (ret < 0) {
+ dev_err(dev, "failed to wire up inputs\n");
+ return ret;
+ }
+
indio_dev->name = DRIVER_NAME;
indio_dev->info = &twl6030_gpadc_iio_info;
indio_dev->modes = INDIO_DIRECT_MODE;
diff --git a/drivers/iio/adc/xilinx-ams.c b/drivers/iio/adc/xilinx-ams.c
index 5b4bdf3a26bb..a507d2e17079 100644
--- a/drivers/iio/adc/xilinx-ams.c
+++ b/drivers/iio/adc/xilinx-ams.c
@@ -1329,7 +1329,7 @@ static int ams_parse_firmware(struct iio_dev *indio_dev)
dev_channels = devm_krealloc(dev, ams_channels, dev_size, GFP_KERNEL);
if (!dev_channels)
- ret = -ENOMEM;
+ return -ENOMEM;
indio_dev->channels = dev_channels;
indio_dev->num_channels = num_channels;
diff --git a/drivers/iio/gyro/hid-sensor-gyro-3d.c b/drivers/iio/gyro/hid-sensor-gyro-3d.c
index 8f0ad022c7f1..698c50da1f10 100644
--- a/drivers/iio/gyro/hid-sensor-gyro-3d.c
+++ b/drivers/iio/gyro/hid-sensor-gyro-3d.c
@@ -231,6 +231,7 @@ static int gyro_3d_capture_sample(struct hid_sensor_hub_device *hsdev,
gyro_state->timestamp =
hid_sensor_convert_timestamp(&gyro_state->common_attributes,
*(s64 *)raw_data);
+ ret = 0;
break;
default:
break;
diff --git a/drivers/iio/imu/fxos8700_core.c b/drivers/iio/imu/fxos8700_core.c
index 423cfe526f2a..6d189c4b9ff9 100644
--- a/drivers/iio/imu/fxos8700_core.c
+++ b/drivers/iio/imu/fxos8700_core.c
@@ -10,6 +10,7 @@
#include <linux/regmap.h>
#include <linux/acpi.h>
#include <linux/bitops.h>
+#include <linux/bitfield.h>
#include <linux/iio/iio.h>
#include <linux/iio/sysfs.h>
@@ -144,9 +145,8 @@
#define FXOS8700_NVM_DATA_BNK0 0xa7
/* Bit definitions for FXOS8700_CTRL_REG1 */
-#define FXOS8700_CTRL_ODR_MSK 0x38
#define FXOS8700_CTRL_ODR_MAX 0x00
-#define FXOS8700_CTRL_ODR_MIN GENMASK(4, 3)
+#define FXOS8700_CTRL_ODR_MSK GENMASK(5, 3)
/* Bit definitions for FXOS8700_M_CTRL_REG1 */
#define FXOS8700_HMS_MASK GENMASK(1, 0)
@@ -320,7 +320,7 @@ static enum fxos8700_sensor fxos8700_to_sensor(enum iio_chan_type iio_type)
switch (iio_type) {
case IIO_ACCEL:
return FXOS8700_ACCEL;
- case IIO_ANGL_VEL:
+ case IIO_MAGN:
return FXOS8700_MAGN;
default:
return -EINVAL;
@@ -345,15 +345,35 @@ static int fxos8700_set_active_mode(struct fxos8700_data *data,
static int fxos8700_set_scale(struct fxos8700_data *data,
enum fxos8700_sensor t, int uscale)
{
- int i;
+ int i, ret, val;
+ bool active_mode;
static const int scale_num = ARRAY_SIZE(fxos8700_accel_scale);
struct device *dev = regmap_get_device(data->regmap);
if (t == FXOS8700_MAGN) {
- dev_err(dev, "Magnetometer scale is locked at 1200uT\n");
+ dev_err(dev, "Magnetometer scale is locked at 0.001Gs\n");
return -EINVAL;
}
+ /*
+ * When device is in active mode, it failed to set an ACCEL
+ * full-scale range(2g/4g/8g) in FXOS8700_XYZ_DATA_CFG.
+ * This is not align with the datasheet, but it is a fxos8700
+ * chip behavier. Set the device in standby mode before setting
+ * an ACCEL full-scale range.
+ */
+ ret = regmap_read(data->regmap, FXOS8700_CTRL_REG1, &val);
+ if (ret)
+ return ret;
+
+ active_mode = val & FXOS8700_ACTIVE;
+ if (active_mode) {
+ ret = regmap_write(data->regmap, FXOS8700_CTRL_REG1,
+ val & ~FXOS8700_ACTIVE);
+ if (ret)
+ return ret;
+ }
+
for (i = 0; i < scale_num; i++)
if (fxos8700_accel_scale[i].uscale == uscale)
break;
@@ -361,8 +381,12 @@ static int fxos8700_set_scale(struct fxos8700_data *data,
if (i == scale_num)
return -EINVAL;
- return regmap_write(data->regmap, FXOS8700_XYZ_DATA_CFG,
+ ret = regmap_write(data->regmap, FXOS8700_XYZ_DATA_CFG,
fxos8700_accel_scale[i].bits);
+ if (ret)
+ return ret;
+ return regmap_write(data->regmap, FXOS8700_CTRL_REG1,
+ active_mode);
}
static int fxos8700_get_scale(struct fxos8700_data *data,
@@ -372,7 +396,7 @@ static int fxos8700_get_scale(struct fxos8700_data *data,
static const int scale_num = ARRAY_SIZE(fxos8700_accel_scale);
if (t == FXOS8700_MAGN) {
- *uscale = 1200; /* Magnetometer is locked at 1200uT */
+ *uscale = 1000; /* Magnetometer is locked at 0.001Gs */
return 0;
}
@@ -394,22 +418,61 @@ static int fxos8700_get_data(struct fxos8700_data *data, int chan_type,
int axis, int *val)
{
u8 base, reg;
+ s16 tmp;
int ret;
- enum fxos8700_sensor type = fxos8700_to_sensor(chan_type);
- base = type ? FXOS8700_OUT_X_MSB : FXOS8700_M_OUT_X_MSB;
+ /*
+ * Different register base addresses varies with channel types.
+ * This bug hasn't been noticed before because using an enum is
+ * really hard to read. Use an a switch statement to take over that.
+ */
+ switch (chan_type) {
+ case IIO_ACCEL:
+ base = FXOS8700_OUT_X_MSB;
+ break;
+ case IIO_MAGN:
+ base = FXOS8700_M_OUT_X_MSB;
+ break;
+ default:
+ return -EINVAL;
+ }
/* Block read 6 bytes of device output registers to avoid data loss */
ret = regmap_bulk_read(data->regmap, base, data->buf,
- FXOS8700_DATA_BUF_SIZE);
+ sizeof(data->buf));
if (ret)
return ret;
/* Convert axis to buffer index */
reg = axis - IIO_MOD_X;
+ /*
+ * Convert to native endianness. The accel data and magn data
+ * are signed, so a forced type conversion is needed.
+ */
+ tmp = be16_to_cpu(data->buf[reg]);
+
+ /*
+ * ACCEL output data registers contain the X-axis, Y-axis, and Z-axis
+ * 14-bit left-justified sample data and MAGN output data registers
+ * contain the X-axis, Y-axis, and Z-axis 16-bit sample data. Apply
+ * a signed 2 bits right shift to the readback raw data from ACCEL
+ * output data register and keep that from MAGN sensor as the origin.
+ * Value should be extended to 32 bit.
+ */
+ switch (chan_type) {
+ case IIO_ACCEL:
+ tmp = tmp >> 2;
+ break;
+ case IIO_MAGN:
+ /* Nothing to do */
+ break;
+ default:
+ return -EINVAL;
+ }
+
/* Convert to native endianness */
- *val = sign_extend32(be16_to_cpu(data->buf[reg]), 15);
+ *val = sign_extend32(tmp, 15);
return 0;
}
@@ -445,10 +508,9 @@ static int fxos8700_set_odr(struct fxos8700_data *data, enum fxos8700_sensor t,
if (i >= odr_num)
return -EINVAL;
- return regmap_update_bits(data->regmap,
- FXOS8700_CTRL_REG1,
- FXOS8700_CTRL_ODR_MSK + FXOS8700_ACTIVE,
- fxos8700_odr[i].bits << 3 | active_mode);
+ val &= ~FXOS8700_CTRL_ODR_MSK;
+ val |= FIELD_PREP(FXOS8700_CTRL_ODR_MSK, fxos8700_odr[i].bits) | FXOS8700_ACTIVE;
+ return regmap_write(data->regmap, FXOS8700_CTRL_REG1, val);
}
static int fxos8700_get_odr(struct fxos8700_data *data, enum fxos8700_sensor t,
@@ -461,7 +523,7 @@ static int fxos8700_get_odr(struct fxos8700_data *data, enum fxos8700_sensor t,
if (ret)
return ret;
- val &= FXOS8700_CTRL_ODR_MSK;
+ val = FIELD_GET(FXOS8700_CTRL_ODR_MSK, val);
for (i = 0; i < odr_num; i++)
if (val == fxos8700_odr[i].bits)
@@ -526,7 +588,7 @@ static IIO_CONST_ATTR(in_accel_sampling_frequency_available,
static IIO_CONST_ATTR(in_magn_sampling_frequency_available,
"1.5625 6.25 12.5 50 100 200 400 800");
static IIO_CONST_ATTR(in_accel_scale_available, "0.000244 0.000488 0.000976");
-static IIO_CONST_ATTR(in_magn_scale_available, "0.000001200");
+static IIO_CONST_ATTR(in_magn_scale_available, "0.001000");
static struct attribute *fxos8700_attrs[] = {
&iio_const_attr_in_accel_sampling_frequency_available.dev_attr.attr,
@@ -592,14 +654,19 @@ static int fxos8700_chip_init(struct fxos8700_data *data, bool use_spi)
if (ret)
return ret;
- /* Max ODR (800Hz individual or 400Hz hybrid), active mode */
- ret = regmap_write(data->regmap, FXOS8700_CTRL_REG1,
- FXOS8700_CTRL_ODR_MAX | FXOS8700_ACTIVE);
+ /*
+ * Set max full-scale range (+/-8G) for ACCEL sensor in chip
+ * initialization then activate the device.
+ */
+ ret = regmap_write(data->regmap, FXOS8700_XYZ_DATA_CFG, MODE_8G);
if (ret)
return ret;
- /* Set for max full-scale range (+/-8G) */
- return regmap_write(data->regmap, FXOS8700_XYZ_DATA_CFG, MODE_8G);
+ /* Max ODR (800Hz individual or 400Hz hybrid), active mode */
+ return regmap_update_bits(data->regmap, FXOS8700_CTRL_REG1,
+ FXOS8700_CTRL_ODR_MSK | FXOS8700_ACTIVE,
+ FIELD_PREP(FXOS8700_CTRL_ODR_MSK, FXOS8700_CTRL_ODR_MAX) |
+ FXOS8700_ACTIVE);
}
static void fxos8700_chip_uninit(void *data)
diff --git a/drivers/iio/imu/st_lsm6dsx/Kconfig b/drivers/iio/imu/st_lsm6dsx/Kconfig
index f6660847fb58..8c16cdacf2f2 100644
--- a/drivers/iio/imu/st_lsm6dsx/Kconfig
+++ b/drivers/iio/imu/st_lsm6dsx/Kconfig
@@ -4,6 +4,7 @@ config IIO_ST_LSM6DSX
tristate "ST_LSM6DSx driver for STM 6-axis IMU MEMS sensors"
depends on (I2C || SPI || I3C)
select IIO_BUFFER
+ select IIO_TRIGGERED_BUFFER
select IIO_KFIFO_BUF
select IIO_ST_LSM6DSX_I2C if (I2C)
select IIO_ST_LSM6DSX_SPI if (SPI_MASTER)
diff --git a/drivers/iio/light/cm32181.c b/drivers/iio/light/cm32181.c
index 001055d09750..b1674a5bfa36 100644
--- a/drivers/iio/light/cm32181.c
+++ b/drivers/iio/light/cm32181.c
@@ -440,6 +440,8 @@ static int cm32181_probe(struct i2c_client *client)
if (!indio_dev)
return -ENOMEM;
+ i2c_set_clientdata(client, indio_dev);
+
/*
* Some ACPI systems list 2 I2C resources for the CM3218 sensor, the
* SMBus Alert Response Address (ARA, 0x0c) and the actual I2C address.
@@ -460,8 +462,6 @@ static int cm32181_probe(struct i2c_client *client)
return PTR_ERR(client);
}
- i2c_set_clientdata(client, indio_dev);
-
cm32181 = iio_priv(indio_dev);
cm32181->client = client;
cm32181->dev = dev;
@@ -490,7 +490,8 @@ static int cm32181_probe(struct i2c_client *client)
static int cm32181_suspend(struct device *dev)
{
- struct i2c_client *client = to_i2c_client(dev);
+ struct cm32181_chip *cm32181 = iio_priv(dev_get_drvdata(dev));
+ struct i2c_client *client = cm32181->client;
return i2c_smbus_write_word_data(client, CM32181_REG_ADDR_CMD,
CM32181_CMD_ALS_DISABLE);
@@ -498,8 +499,8 @@ static int cm32181_suspend(struct device *dev)
static int cm32181_resume(struct device *dev)
{
- struct i2c_client *client = to_i2c_client(dev);
struct cm32181_chip *cm32181 = iio_priv(dev_get_drvdata(dev));
+ struct i2c_client *client = cm32181->client;
return i2c_smbus_write_word_data(client, CM32181_REG_ADDR_CMD,
cm32181->conf_regs[CM32181_REG_ADDR_CMD]);
diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
index 43b26bc12288..39357dc2d229 100644
--- a/drivers/infiniband/core/umem_dmabuf.c
+++ b/drivers/infiniband/core/umem_dmabuf.c
@@ -26,8 +26,8 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
if (umem_dmabuf->sgt)
goto wait_fence;
- sgt = dma_buf_map_attachment_unlocked(umem_dmabuf->attach,
- DMA_BIDIRECTIONAL);
+ sgt = dma_buf_map_attachment(umem_dmabuf->attach,
+ DMA_BIDIRECTIONAL);
if (IS_ERR(sgt))
return PTR_ERR(sgt);
@@ -103,8 +103,8 @@ void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf)
umem_dmabuf->last_sg_trim = 0;
}
- dma_buf_unmap_attachment_unlocked(umem_dmabuf->attach, umem_dmabuf->sgt,
- DMA_BIDIRECTIONAL);
+ dma_buf_unmap_attachment(umem_dmabuf->attach, umem_dmabuf->sgt,
+ DMA_BIDIRECTIONAL);
umem_dmabuf->sgt = NULL;
}
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 26b021f43ba4..11b1c1603aeb 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -2957,15 +2957,18 @@ EXPORT_SYMBOL(__rdma_block_iter_start);
bool __rdma_block_iter_next(struct ib_block_iter *biter)
{
unsigned int block_offset;
+ unsigned int sg_delta;
if (!biter->__sg_nents || !biter->__sg)
return false;
biter->__dma_addr = sg_dma_address(biter->__sg) + biter->__sg_advance;
block_offset = biter->__dma_addr & (BIT_ULL(biter->__pg_bit) - 1);
- biter->__sg_advance += BIT_ULL(biter->__pg_bit) - block_offset;
+ sg_delta = BIT_ULL(biter->__pg_bit) - block_offset;
- if (biter->__sg_advance >= sg_dma_len(biter->__sg)) {
+ if (sg_dma_len(biter->__sg) - biter->__sg_advance > sg_delta) {
+ biter->__sg_advance += sg_delta;
+ } else {
biter->__sg_advance = 0;
biter->__sg = sg_next(biter->__sg);
biter->__sg_nents--;
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index f5f9269fdc16..7c5d487ec916 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -1318,12 +1318,15 @@ static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg,
addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
sizeof(tinfo.tidcnt)))
- return -EFAULT;
+ ret = -EFAULT;
addr = arg + offsetof(struct hfi1_tid_info, length);
- if (copy_to_user((void __user *)addr, &tinfo.length,
+ if (!ret && copy_to_user((void __user *)addr, &tinfo.length,
sizeof(tinfo.length)))
ret = -EFAULT;
+
+ if (ret)
+ hfi1_user_exp_rcv_invalid(fd, &tinfo);
}
return ret;
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 186d30291260..350884d5f089 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -23,18 +23,25 @@ static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
unsigned long cur_seq);
+static bool tid_cover_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq);
static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *,
struct tid_group *grp,
unsigned int start, u16 count,
u32 *tidlist, unsigned int *tididx,
unsigned int *pmapped);
-static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
- struct tid_group **grp);
+static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo);
+static void __clear_tid_node(struct hfi1_filedata *fd,
+ struct tid_rb_node *node);
static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);
static const struct mmu_interval_notifier_ops tid_mn_ops = {
.invalidate = tid_rb_invalidate,
};
+static const struct mmu_interval_notifier_ops tid_cover_ops = {
+ .invalidate = tid_cover_invalidate,
+};
/*
* Initialize context and file private data needed for Expected
@@ -153,16 +160,11 @@ static void unpin_rcv_pages(struct hfi1_filedata *fd,
static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf)
{
int pinned;
- unsigned int npages;
+ unsigned int npages = tidbuf->npages;
unsigned long vaddr = tidbuf->vaddr;
struct page **pages = NULL;
struct hfi1_devdata *dd = fd->uctxt->dd;
- /* Get the number of pages the user buffer spans */
- npages = num_user_pages(vaddr, tidbuf->length);
- if (!npages)
- return -EINVAL;
-
if (npages > fd->uctxt->expected_count) {
dd_dev_err(dd, "Expected buffer too big\n");
return -EINVAL;
@@ -189,7 +191,6 @@ static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf)
return pinned;
}
tidbuf->pages = pages;
- tidbuf->npages = npages;
fd->tid_n_pinned += pinned;
return pinned;
}
@@ -253,53 +254,66 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
tididx = 0, mapped, mapped_pages = 0;
u32 *tidlist = NULL;
struct tid_user_buf *tidbuf;
+ unsigned long mmu_seq = 0;
if (!PAGE_ALIGNED(tinfo->vaddr))
return -EINVAL;
+ if (tinfo->length == 0)
+ return -EINVAL;
tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL);
if (!tidbuf)
return -ENOMEM;
+ mutex_init(&tidbuf->cover_mutex);
tidbuf->vaddr = tinfo->vaddr;
tidbuf->length = tinfo->length;
+ tidbuf->npages = num_user_pages(tidbuf->vaddr, tidbuf->length);
tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets),
GFP_KERNEL);
if (!tidbuf->psets) {
- kfree(tidbuf);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail_release_mem;
+ }
+
+ if (fd->use_mn) {
+ ret = mmu_interval_notifier_insert(
+ &tidbuf->notifier, current->mm,
+ tidbuf->vaddr, tidbuf->npages * PAGE_SIZE,
+ &tid_cover_ops);
+ if (ret)
+ goto fail_release_mem;
+ mmu_seq = mmu_interval_read_begin(&tidbuf->notifier);
}
pinned = pin_rcv_pages(fd, tidbuf);
if (pinned <= 0) {
- kfree(tidbuf->psets);
- kfree(tidbuf);
- return pinned;
+ ret = (pinned < 0) ? pinned : -ENOSPC;
+ goto fail_unpin;
}
/* Find sets of physically contiguous pages */
tidbuf->n_psets = find_phys_blocks(tidbuf, pinned);
- /*
- * We don't need to access this under a lock since tid_used is per
- * process and the same process cannot be in hfi1_user_exp_rcv_clear()
- * and hfi1_user_exp_rcv_setup() at the same time.
- */
+ /* Reserve the number of expected tids to be used. */
spin_lock(&fd->tid_lock);
if (fd->tid_used + tidbuf->n_psets > fd->tid_limit)
pageset_count = fd->tid_limit - fd->tid_used;
else
pageset_count = tidbuf->n_psets;
+ fd->tid_used += pageset_count;
spin_unlock(&fd->tid_lock);
- if (!pageset_count)
- goto bail;
+ if (!pageset_count) {
+ ret = -ENOSPC;
+ goto fail_unreserve;
+ }
ngroups = pageset_count / dd->rcv_entries.group_size;
tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL);
if (!tidlist) {
ret = -ENOMEM;
- goto nomem;
+ goto fail_unreserve;
}
tididx = 0;
@@ -395,43 +409,78 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
}
unlock:
mutex_unlock(&uctxt->exp_mutex);
-nomem:
hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx,
mapped_pages, ret);
- if (tididx) {
- spin_lock(&fd->tid_lock);
- fd->tid_used += tididx;
- spin_unlock(&fd->tid_lock);
- tinfo->tidcnt = tididx;
- tinfo->length = mapped_pages * PAGE_SIZE;
-
- if (copy_to_user(u64_to_user_ptr(tinfo->tidlist),
- tidlist, sizeof(tidlist[0]) * tididx)) {
- /*
- * On failure to copy to the user level, we need to undo
- * everything done so far so we don't leak resources.
- */
- tinfo->tidlist = (unsigned long)&tidlist;
- hfi1_user_exp_rcv_clear(fd, tinfo);
- tinfo->tidlist = 0;
- ret = -EFAULT;
- goto bail;
+
+ /* fail if nothing was programmed, set error if none provided */
+ if (tididx == 0) {
+ if (ret >= 0)
+ ret = -ENOSPC;
+ goto fail_unreserve;
+ }
+
+ /* adjust reserved tid_used to actual count */
+ spin_lock(&fd->tid_lock);
+ fd->tid_used -= pageset_count - tididx;
+ spin_unlock(&fd->tid_lock);
+
+ /* unpin all pages not covered by a TID */
+ unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, pinned - mapped_pages,
+ false);
+
+ if (fd->use_mn) {
+ /* check for an invalidate during setup */
+ bool fail = false;
+
+ mutex_lock(&tidbuf->cover_mutex);
+ fail = mmu_interval_read_retry(&tidbuf->notifier, mmu_seq);
+ mutex_unlock(&tidbuf->cover_mutex);
+
+ if (fail) {
+ ret = -EBUSY;
+ goto fail_unprogram;
}
}
- /*
- * If not everything was mapped (due to insufficient RcvArray entries,
- * for example), unpin all unmapped pages so we can pin them nex time.
- */
- if (mapped_pages != pinned)
- unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages,
- (pinned - mapped_pages), false);
-bail:
+ tinfo->tidcnt = tididx;
+ tinfo->length = mapped_pages * PAGE_SIZE;
+
+ if (copy_to_user(u64_to_user_ptr(tinfo->tidlist),
+ tidlist, sizeof(tidlist[0]) * tididx)) {
+ ret = -EFAULT;
+ goto fail_unprogram;
+ }
+
+ if (fd->use_mn)
+ mmu_interval_notifier_remove(&tidbuf->notifier);
+ kfree(tidbuf->pages);
kfree(tidbuf->psets);
+ kfree(tidbuf);
kfree(tidlist);
+ return 0;
+
+fail_unprogram:
+ /* unprogram, unmap, and unpin all allocated TIDs */
+ tinfo->tidlist = (unsigned long)tidlist;
+ hfi1_user_exp_rcv_clear(fd, tinfo);
+ tinfo->tidlist = 0;
+ pinned = 0; /* nothing left to unpin */
+ pageset_count = 0; /* nothing left reserved */
+fail_unreserve:
+ spin_lock(&fd->tid_lock);
+ fd->tid_used -= pageset_count;
+ spin_unlock(&fd->tid_lock);
+fail_unpin:
+ if (fd->use_mn)
+ mmu_interval_notifier_remove(&tidbuf->notifier);
+ if (pinned > 0)
+ unpin_rcv_pages(fd, tidbuf, NULL, 0, pinned, false);
+fail_release_mem:
kfree(tidbuf->pages);
+ kfree(tidbuf->psets);
kfree(tidbuf);
- return ret > 0 ? 0 : ret;
+ kfree(tidlist);
+ return ret;
}
int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
@@ -452,7 +501,7 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
mutex_lock(&uctxt->exp_mutex);
for (tididx = 0; tididx < tinfo->tidcnt; tididx++) {
- ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL);
+ ret = unprogram_rcvarray(fd, tidinfo[tididx]);
if (ret) {
hfi1_cdbg(TID, "Failed to unprogram rcv array %d",
ret);
@@ -706,6 +755,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
}
node->fdata = fd;
+ mutex_init(&node->invalidate_mutex);
node->phys = page_to_phys(pages[0]);
node->npages = npages;
node->rcventry = rcventry;
@@ -721,11 +771,6 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
&tid_mn_ops);
if (ret)
goto out_unmap;
- /*
- * FIXME: This is in the wrong order, the notifier should be
- * established before the pages are pinned by pin_rcv_pages.
- */
- mmu_interval_read_begin(&node->notifier);
}
fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node;
@@ -745,8 +790,7 @@ out_unmap:
return -EFAULT;
}
-static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
- struct tid_group **grp)
+static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
@@ -769,9 +813,6 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
if (!node || node->rcventry != (uctxt->expected_base + rcventry))
return -EBADF;
- if (grp)
- *grp = node->grp;
-
if (fd->use_mn)
mmu_interval_notifier_remove(&node->notifier);
cacheless_tid_rb_remove(fd, node);
@@ -779,23 +820,34 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
return 0;
}
-static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
+static void __clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
+ mutex_lock(&node->invalidate_mutex);
+ if (node->freed)
+ goto done;
+ node->freed = true;
+
trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry,
node->npages,
node->notifier.interval_tree.start, node->phys,
node->dma_addr);
- /*
- * Make sure device has seen the write before we unpin the
- * pages.
- */
+ /* Make sure device has seen the write before pages are unpinned */
hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0);
unpin_rcv_pages(fd, NULL, node, 0, node->npages, true);
+done:
+ mutex_unlock(&node->invalidate_mutex);
+}
+
+static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
+{
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
+
+ __clear_tid_node(fd, node);
node->grp->used--;
node->grp->map &= ~(1 << (node->rcventry - node->grp->base));
@@ -854,10 +906,16 @@ static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
if (node->freed)
return true;
+ /* take action only if unmapping */
+ if (range->event != MMU_NOTIFY_UNMAP)
+ return true;
+
trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt,
node->notifier.interval_tree.start,
node->rcventry, node->npages, node->dma_addr);
- node->freed = true;
+
+ /* clear the hardware rcvarray entry */
+ __clear_tid_node(fdata, node);
spin_lock(&fdata->invalid_lock);
if (fdata->invalid_tid_idx < uctxt->expected_count) {
@@ -887,6 +945,23 @@ static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
return true;
}
+static bool tid_cover_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
+{
+ struct tid_user_buf *tidbuf =
+ container_of(mni, struct tid_user_buf, notifier);
+
+ /* take action only if unmapping */
+ if (range->event == MMU_NOTIFY_UNMAP) {
+ mutex_lock(&tidbuf->cover_mutex);
+ mmu_interval_set_seq(mni, cur_seq);
+ mutex_unlock(&tidbuf->cover_mutex);
+ }
+
+ return true;
+}
+
static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
struct tid_rb_node *tnode)
{
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
index 8c53e416bf84..f8ee997d0050 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
@@ -16,6 +16,8 @@ struct tid_pageset {
};
struct tid_user_buf {
+ struct mmu_interval_notifier notifier;
+ struct mutex cover_mutex;
unsigned long vaddr;
unsigned long length;
unsigned int npages;
@@ -27,6 +29,7 @@ struct tid_user_buf {
struct tid_rb_node {
struct mmu_interval_notifier notifier;
struct hfi1_filedata *fdata;
+ struct mutex invalidate_mutex; /* covers hw removal */
unsigned long phys;
struct tid_group *grp;
u32 rcventry;
diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c
index 7b086fe63a24..195aa9ea18b6 100644
--- a/drivers/infiniband/hw/irdma/cm.c
+++ b/drivers/infiniband/hw/irdma/cm.c
@@ -1722,6 +1722,9 @@ static int irdma_add_mqh_4(struct irdma_device *iwdev,
continue;
idev = in_dev_get(ip_dev);
+ if (!idev)
+ continue;
+
in_dev_for_each_ifa_rtnl(ifa, idev) {
ibdev_dbg(&iwdev->ibdev,
"CM: Allocating child CM Listener forIP=%pI4, vlan_id=%d, MAC=%pM\n",
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index ea15ec77e321..54b61930a7fd 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -289,7 +289,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
/* IB ports start with 1, MANA Ethernet ports start with 0 */
port = ucmd.port;
- if (ucmd.port > mc->num_ports)
+ if (port < 1 || port > mc->num_ports)
return -EINVAL;
if (attr->cap.max_send_wr > MAX_SEND_BUFFERS_PER_QUEUE) {
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
index 945758f39523..3e1272695d99 100644
--- a/drivers/infiniband/hw/mlx5/counters.c
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -278,7 +278,6 @@ static int do_get_hw_stats(struct ib_device *ibdev,
const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
struct mlx5_core_dev *mdev;
int ret, num_counters;
- u32 mdev_port_num;
if (!stats)
return -EINVAL;
@@ -299,8 +298,9 @@ static int do_get_hw_stats(struct ib_device *ibdev,
}
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
- mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
- &mdev_port_num);
+ if (!port_num)
+ port_num = 1;
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL);
if (!mdev) {
/* If port is not affiliated yet, its in down state
* which doesn't have any counters yet, so it would be
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 40d9410ec303..cf953d23d18d 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -4502,6 +4502,40 @@ static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev,
return false;
}
+static int validate_rd_atomic(struct mlx5_ib_dev *dev, struct ib_qp_attr *attr,
+ int attr_mask, enum ib_qp_type qp_type)
+{
+ int log_max_ra_res;
+ int log_max_ra_req;
+
+ if (qp_type == MLX5_IB_QPT_DCI) {
+ log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev,
+ log_max_ra_res_dc);
+ log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev,
+ log_max_ra_req_dc);
+ } else {
+ log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev,
+ log_max_ra_res_qp);
+ log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev,
+ log_max_ra_req_qp);
+ }
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+ attr->max_rd_atomic > log_max_ra_res) {
+ mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n",
+ attr->max_rd_atomic);
+ return false;
+ }
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+ attr->max_dest_rd_atomic > log_max_ra_req) {
+ mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n",
+ attr->max_dest_rd_atomic);
+ return false;
+ }
+ return true;
+}
+
int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
@@ -4589,21 +4623,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
}
- if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
- attr->max_rd_atomic >
- (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) {
- mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n",
- attr->max_rd_atomic);
- goto out;
- }
-
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
- attr->max_dest_rd_atomic >
- (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) {
- mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n",
- attr->max_dest_rd_atomic);
+ if (!validate_rd_atomic(dev, attr, attr_mask, qp_type))
goto out;
- }
if (cur_state == new_state && cur_state == IB_QPS_RESET) {
err = 0;
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index c301b3be9f30..a2857accc427 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -276,8 +276,8 @@ iter_chunk:
size = pa_end - pa_start + PAGE_SIZE;
usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x",
va_start, &pa_start, size, flags);
- err = iommu_map(pd->domain, va_start, pa_start,
- size, flags);
+ err = iommu_map_atomic(pd->domain, va_start,
+ pa_start, size, flags);
if (err) {
usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
va_start, &pa_start, size, err);
@@ -293,8 +293,8 @@ iter_chunk:
size = pa - pa_start + PAGE_SIZE;
usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n",
va_start, &pa_start, size, flags);
- err = iommu_map(pd->domain, va_start, pa_start,
- size, flags);
+ err = iommu_map_atomic(pd->domain, va_start,
+ pa_start, size, flags);
if (err) {
usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
va_start, &pa_start, size, err);
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index a754fc902e3d..7b41d79e72b2 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -98,11 +98,11 @@ enum rxe_device_param {
RXE_MAX_SRQ = DEFAULT_MAX_VALUE - RXE_MIN_SRQ_INDEX,
RXE_MIN_MR_INDEX = 0x00000001,
- RXE_MAX_MR_INDEX = DEFAULT_MAX_VALUE,
- RXE_MAX_MR = DEFAULT_MAX_VALUE - RXE_MIN_MR_INDEX,
- RXE_MIN_MW_INDEX = 0x00010001,
- RXE_MAX_MW_INDEX = 0x00020000,
- RXE_MAX_MW = 0x00001000,
+ RXE_MAX_MR_INDEX = DEFAULT_MAX_VALUE >> 1,
+ RXE_MAX_MR = RXE_MAX_MR_INDEX - RXE_MIN_MR_INDEX,
+ RXE_MIN_MW_INDEX = RXE_MAX_MR_INDEX + 1,
+ RXE_MAX_MW_INDEX = DEFAULT_MAX_VALUE,
+ RXE_MAX_MW = RXE_MAX_MW_INDEX - RXE_MIN_MW_INDEX,
RXE_MAX_PKT_PER_ACK = 64,
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index f50620f5a0a1..1151c0b5ccea 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -23,16 +23,16 @@ static const struct rxe_type_info {
.size = sizeof(struct rxe_ucontext),
.elem_offset = offsetof(struct rxe_ucontext, elem),
.min_index = 1,
- .max_index = UINT_MAX,
- .max_elem = UINT_MAX,
+ .max_index = RXE_MAX_UCONTEXT,
+ .max_elem = RXE_MAX_UCONTEXT,
},
[RXE_TYPE_PD] = {
.name = "pd",
.size = sizeof(struct rxe_pd),
.elem_offset = offsetof(struct rxe_pd, elem),
.min_index = 1,
- .max_index = UINT_MAX,
- .max_elem = UINT_MAX,
+ .max_index = RXE_MAX_PD,
+ .max_elem = RXE_MAX_PD,
},
[RXE_TYPE_AH] = {
.name = "ah",
@@ -40,7 +40,7 @@ static const struct rxe_type_info {
.elem_offset = offsetof(struct rxe_ah, elem),
.min_index = RXE_MIN_AH_INDEX,
.max_index = RXE_MAX_AH_INDEX,
- .max_elem = RXE_MAX_AH_INDEX - RXE_MIN_AH_INDEX + 1,
+ .max_elem = RXE_MAX_AH,
},
[RXE_TYPE_SRQ] = {
.name = "srq",
@@ -49,7 +49,7 @@ static const struct rxe_type_info {
.cleanup = rxe_srq_cleanup,
.min_index = RXE_MIN_SRQ_INDEX,
.max_index = RXE_MAX_SRQ_INDEX,
- .max_elem = RXE_MAX_SRQ_INDEX - RXE_MIN_SRQ_INDEX + 1,
+ .max_elem = RXE_MAX_SRQ,
},
[RXE_TYPE_QP] = {
.name = "qp",
@@ -58,7 +58,7 @@ static const struct rxe_type_info {
.cleanup = rxe_qp_cleanup,
.min_index = RXE_MIN_QP_INDEX,
.max_index = RXE_MAX_QP_INDEX,
- .max_elem = RXE_MAX_QP_INDEX - RXE_MIN_QP_INDEX + 1,
+ .max_elem = RXE_MAX_QP,
},
[RXE_TYPE_CQ] = {
.name = "cq",
@@ -66,8 +66,8 @@ static const struct rxe_type_info {
.elem_offset = offsetof(struct rxe_cq, elem),
.cleanup = rxe_cq_cleanup,
.min_index = 1,
- .max_index = UINT_MAX,
- .max_elem = UINT_MAX,
+ .max_index = RXE_MAX_CQ,
+ .max_elem = RXE_MAX_CQ,
},
[RXE_TYPE_MR] = {
.name = "mr",
@@ -76,7 +76,7 @@ static const struct rxe_type_info {
.cleanup = rxe_mr_cleanup,
.min_index = RXE_MIN_MR_INDEX,
.max_index = RXE_MAX_MR_INDEX,
- .max_elem = RXE_MAX_MR_INDEX - RXE_MIN_MR_INDEX + 1,
+ .max_elem = RXE_MAX_MR,
},
[RXE_TYPE_MW] = {
.name = "mw",
@@ -85,7 +85,7 @@ static const struct rxe_type_info {
.cleanup = rxe_mw_cleanup,
.min_index = RXE_MIN_MW_INDEX,
.max_index = RXE_MAX_MW_INDEX,
- .max_elem = RXE_MAX_MW_INDEX - RXE_MIN_MW_INDEX + 1,
+ .max_elem = RXE_MAX_MW,
},
};
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index ac25fc80fb33..f10d4bcf87d2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2200,6 +2200,14 @@ int ipoib_intf_init(struct ib_device *hca, u32 port, const char *name,
rn->attach_mcast = ipoib_mcast_attach;
rn->detach_mcast = ipoib_mcast_detach;
rn->hca = hca;
+
+ rc = netif_set_real_num_tx_queues(dev, 1);
+ if (rc)
+ goto out;
+
+ rc = netif_set_real_num_rx_queues(dev, 1);
+ if (rc)
+ goto out;
}
priv->rn_ops = dev->netdev_ops;
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
index c76ba29da1e2..5adba0f754b6 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
@@ -312,9 +312,8 @@ void rtrs_srv_destroy_path_files(struct rtrs_srv_path *srv_path)
if (srv_path->kobj.state_in_sysfs) {
sysfs_remove_group(&srv_path->kobj, &rtrs_srv_path_attr_group);
- kobject_del(&srv_path->kobj);
kobject_put(&srv_path->kobj);
+ rtrs_srv_destroy_once_sysfs_root_folders(srv_path);
}
- rtrs_srv_destroy_once_sysfs_root_folders(srv_path);
}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 00b0068fda20..5d94db453df3 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -62,9 +62,6 @@ enum {
SRP_DEFAULT_CMD_SQ_SIZE = SRP_DEFAULT_QUEUE_SIZE - SRP_RSP_SQ_SIZE -
SRP_TSK_MGMT_SQ_SIZE,
- SRP_TAG_NO_REQ = ~0U,
- SRP_TAG_TSK_MGMT = 1U << 31,
-
SRP_MAX_PAGES_PER_MR = 512,
SRP_MAX_ADD_CDB_LEN = 16,
@@ -79,6 +76,11 @@ enum {
sizeof(struct srp_imm_buf),
};
+enum {
+ SRP_TAG_NO_REQ = ~0U,
+ SRP_TAG_TSK_MGMT = BIT(31),
+};
+
enum srp_target_state {
SRP_TARGET_SCANNING,
SRP_TARGET_LIVE,
diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c
index 8d8ebdc2039b..67f1c7364c95 100644
--- a/drivers/input/misc/xen-kbdfront.c
+++ b/drivers/input/misc/xen-kbdfront.c
@@ -51,7 +51,7 @@ module_param_array(ptr_size, int, NULL, 0444);
MODULE_PARM_DESC(ptr_size,
"Pointing device width, height in pixels (default 800,600)");
-static int xenkbd_remove(struct xenbus_device *);
+static void xenkbd_remove(struct xenbus_device *);
static int xenkbd_connect_backend(struct xenbus_device *, struct xenkbd_info *);
static void xenkbd_disconnect_backend(struct xenkbd_info *);
@@ -404,7 +404,7 @@ static int xenkbd_resume(struct xenbus_device *dev)
return xenkbd_connect_backend(dev, info);
}
-static int xenkbd_remove(struct xenbus_device *dev)
+static void xenkbd_remove(struct xenbus_device *dev)
{
struct xenkbd_info *info = dev_get_drvdata(&dev->dev);
@@ -417,7 +417,6 @@ static int xenkbd_remove(struct xenbus_device *dev)
input_unregister_device(info->mtouch);
free_page((unsigned long)info->page);
kfree(info);
- return 0;
}
static int xenkbd_connect_backend(struct xenbus_device *dev,
diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index b0f776448a1c..fa021af8506e 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -192,7 +192,6 @@ static const char * const smbus_pnp_ids[] = {
"SYN3221", /* HP 15-ay000 */
"SYN323d", /* HP Spectre X360 13-w013dx */
"SYN3257", /* HP Envy 13-ad105ng */
- "SYN3286", /* HP Laptop 15-da3001TU */
NULL
};
diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h
index 46f8a694291e..efc61736099b 100644
--- a/drivers/input/serio/i8042-acpipnpio.h
+++ b/drivers/input/serio/i8042-acpipnpio.h
@@ -1240,6 +1240,13 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
},
{
.matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "PCX0DX"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ .matches = {
DMI_MATCH(DMI_BOARD_NAME, "X170SM"),
},
.driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
diff --git a/drivers/interconnect/qcom/icc-rpm.c b/drivers/interconnect/qcom/icc-rpm.c
index ba6781f54ab7..df3196f72536 100644
--- a/drivers/interconnect/qcom/icc-rpm.c
+++ b/drivers/interconnect/qcom/icc-rpm.c
@@ -488,7 +488,7 @@ int qnoc_probe(struct platform_device *pdev)
}
regmap_done:
- ret = devm_clk_bulk_get(dev, qp->num_clks, qp->bus_clks);
+ ret = devm_clk_bulk_get_optional(dev, qp->num_clks, qp->bus_clks);
if (ret)
return ret;
diff --git a/drivers/interconnect/qcom/msm8996.c b/drivers/interconnect/qcom/msm8996.c
index c2903ae3b3bc..25a1a32bc611 100644
--- a/drivers/interconnect/qcom/msm8996.c
+++ b/drivers/interconnect/qcom/msm8996.c
@@ -33,6 +33,13 @@ static const char * const bus_a0noc_clocks[] = {
"aggre0_noc_mpu_cfg"
};
+static const char * const bus_a2noc_clocks[] = {
+ "bus",
+ "bus_a",
+ "aggre2_ufs_axi",
+ "ufs_axi"
+};
+
static const u16 mas_a0noc_common_links[] = {
MSM8996_SLAVE_A0NOC_SNOC
};
@@ -1806,7 +1813,7 @@ static const struct regmap_config msm8996_a0noc_regmap_config = {
.reg_bits = 32,
.reg_stride = 4,
.val_bits = 32,
- .max_register = 0x9000,
+ .max_register = 0x6000,
.fast_io = true
};
@@ -1830,7 +1837,7 @@ static const struct regmap_config msm8996_a1noc_regmap_config = {
.reg_bits = 32,
.reg_stride = 4,
.val_bits = 32,
- .max_register = 0x7000,
+ .max_register = 0x5000,
.fast_io = true
};
@@ -1851,7 +1858,7 @@ static const struct regmap_config msm8996_a2noc_regmap_config = {
.reg_bits = 32,
.reg_stride = 4,
.val_bits = 32,
- .max_register = 0xa000,
+ .max_register = 0x7000,
.fast_io = true
};
@@ -1859,6 +1866,8 @@ static const struct qcom_icc_desc msm8996_a2noc = {
.type = QCOM_ICC_NOC,
.nodes = a2noc_nodes,
.num_nodes = ARRAY_SIZE(a2noc_nodes),
+ .clocks = bus_a2noc_clocks,
+ .num_clocks = ARRAY_SIZE(bus_a2noc_clocks),
.regmap_cfg = &msm8996_a2noc_regmap_config
};
@@ -1877,7 +1886,7 @@ static const struct regmap_config msm8996_bimc_regmap_config = {
.reg_bits = 32,
.reg_stride = 4,
.val_bits = 32,
- .max_register = 0x62000,
+ .max_register = 0x5a000,
.fast_io = true
};
@@ -1988,7 +1997,7 @@ static const struct regmap_config msm8996_mnoc_regmap_config = {
.reg_bits = 32,
.reg_stride = 4,
.val_bits = 32,
- .max_register = 0x20000,
+ .max_register = 0x1c000,
.fast_io = true
};
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index ab160198edd6..f2425b0f0cd6 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -3858,7 +3858,9 @@ static int arm_smmu_device_remove(struct platform_device *pdev)
static void arm_smmu_device_shutdown(struct platform_device *pdev)
{
- arm_smmu_device_remove(pdev);
+ struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
+
+ arm_smmu_device_disable(smmu);
}
static const struct of_device_id arm_smmu_of_match[] = {
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 719fbca1fe52..2ff7a72cf377 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1316,8 +1316,14 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY:
- /* Assume that a coherent TCU implies coherent TBUs */
- return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
+ /*
+ * It's overwhelmingly the case in practice that when the pagetable
+ * walk interface is connected to a coherent interconnect, all the
+ * translation interfaces are too. Furthermore if the device is
+ * natively coherent, then its translation interface must also be.
+ */
+ return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK ||
+ device_get_dma_attr(dev) == DEV_DMA_COHERENT;
case IOMMU_CAP_NOEXEC:
return true;
default:
@@ -2185,19 +2191,16 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
return 0;
}
-static int arm_smmu_device_remove(struct platform_device *pdev)
+static void arm_smmu_device_shutdown(struct platform_device *pdev)
{
struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
if (!smmu)
- return -ENODEV;
+ return;
if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
dev_notice(&pdev->dev, "disabling translation\n");
- iommu_device_unregister(&smmu->iommu);
- iommu_device_sysfs_remove(&smmu->iommu);
-
arm_smmu_rpm_get(smmu);
/* Turn the thing off */
arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD);
@@ -2209,12 +2212,21 @@ static int arm_smmu_device_remove(struct platform_device *pdev)
clk_bulk_disable(smmu->num_clks, smmu->clks);
clk_bulk_unprepare(smmu->num_clks, smmu->clks);
- return 0;
}
-static void arm_smmu_device_shutdown(struct platform_device *pdev)
+static int arm_smmu_device_remove(struct platform_device *pdev)
{
- arm_smmu_device_remove(pdev);
+ struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
+
+ if (!smmu)
+ return -ENODEV;
+
+ iommu_device_unregister(&smmu->iommu);
+ iommu_device_sysfs_remove(&smmu->iommu);
+
+ arm_smmu_device_shutdown(pdev);
+
+ return 0;
}
static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index de91dd88705b..5f6a85aea501 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3185,14 +3185,16 @@ EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner);
*/
int iommu_device_claim_dma_owner(struct device *dev, void *owner)
{
- struct iommu_group *group = iommu_group_get(dev);
+ struct iommu_group *group;
int ret = 0;
- if (!group)
- return -ENODEV;
if (WARN_ON(!owner))
return -EINVAL;
+ group = iommu_group_get(dev);
+ if (!group)
+ return -ENODEV;
+
mutex_lock(&group->mutex);
if (group->owner_cnt) {
if (group->owner != owner) {
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index a44ad92fc5eb..fe452ce46642 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -197,7 +197,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
curr = __get_cached_rbnode(iovad, limit_pfn);
curr_iova = to_iova(curr);
- retry_pfn = curr_iova->pfn_hi + 1;
+ retry_pfn = curr_iova->pfn_hi;
retry:
do {
@@ -211,7 +211,7 @@ retry:
if (high_pfn < size || new_pfn < low_pfn) {
if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) {
high_pfn = limit_pfn;
- low_pfn = retry_pfn;
+ low_pfn = retry_pfn + 1;
curr = iova_find_limit(iovad, limit_pfn);
curr_iova = to_iova(curr);
goto retry;
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index 69682ee068d2..ca581ff1c769 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -683,7 +683,7 @@ static int mtk_iommu_v1_probe(struct platform_device *pdev)
ret = iommu_device_sysfs_add(&data->iommu, &pdev->dev, NULL,
dev_name(&pdev->dev));
if (ret)
- return ret;
+ goto out_clk_unprepare;
ret = iommu_device_register(&data->iommu, &mtk_iommu_v1_ops, dev);
if (ret)
@@ -698,6 +698,8 @@ out_dev_unreg:
iommu_device_unregister(&data->iommu);
out_sysfs_remove:
iommu_device_sysfs_remove(&data->iommu);
+out_clk_unprepare:
+ clk_disable_unprepare(data->bclk);
return ret;
}
diff --git a/drivers/md/bcache/bcache_ondisk.h b/drivers/md/bcache/bcache_ondisk.h
index 97413586195b..f96034e0ba4f 100644
--- a/drivers/md/bcache/bcache_ondisk.h
+++ b/drivers/md/bcache/bcache_ondisk.h
@@ -106,7 +106,8 @@ static inline unsigned long bkey_bytes(const struct bkey *k)
return bkey_u64s(k) * sizeof(__u64);
}
-#define bkey_copy(_dest, _src) memcpy(_dest, _src, bkey_bytes(_src))
+#define bkey_copy(_dest, _src) unsafe_memcpy(_dest, _src, bkey_bytes(_src), \
+ /* bkey is always padded */)
static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src)
{
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index e5da469a4235..c182c21de2e8 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -149,7 +149,8 @@ add:
bytes, GFP_KERNEL);
if (!i)
return -ENOMEM;
- memcpy(&i->j, j, bytes);
+ unsafe_memcpy(&i->j, j, bytes,
+ /* "bytes" was calculated by set_bytes() above */);
/* Add to the location after 'where' points to */
list_add(&i->list, where);
ret = 1;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index e1ea3a7bd9d9..b424a6ee27ba 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1742,6 +1742,8 @@ static void dm_split_and_process_bio(struct mapped_device *md,
* otherwise associated queue_limits won't be imposed.
*/
bio = bio_split_to_limits(bio);
+ if (!bio)
+ return;
}
init_clone_info(&ci, md, map, bio, is_abnormal);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 775f1dde190a..02b0240e7c71 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -455,6 +455,8 @@ static void md_submit_bio(struct bio *bio)
}
bio = bio_split_to_limits(bio);
+ if (!bio)
+ return;
if (mddev->ro == MD_RDONLY && unlikely(rw == WRITE)) {
if (bio_sectors(bio) != 0)
@@ -3642,7 +3644,7 @@ EXPORT_SYMBOL_GPL(md_rdev_init);
*/
static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
{
- static struct md_rdev *claim_rdev; /* just for claiming the bdev */
+ static struct md_rdev claim_rdev; /* just for claiming the bdev */
struct md_rdev *rdev;
sector_t size;
int err;
@@ -3660,7 +3662,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
rdev->bdev = blkdev_get_by_dev(newdev,
FMODE_READ | FMODE_WRITE | FMODE_EXCL,
- super_format == -2 ? claim_rdev : rdev);
+ super_format == -2 ? &claim_rdev : rdev);
if (IS_ERR(rdev->bdev)) {
pr_warn("md: could not open device unknown-block(%u,%u).\n",
MAJOR(newdev), MINOR(newdev));
diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c
index fc3758a5bc1c..53e495223ea0 100644
--- a/drivers/media/common/videobuf2/videobuf2-core.c
+++ b/drivers/media/common/videobuf2/videobuf2-core.c
@@ -2149,8 +2149,6 @@ int vb2_core_streamon(struct vb2_queue *q, unsigned int type)
if (ret)
return ret;
- q->streaming = 1;
-
/*
* Tell driver to start streaming provided sufficient buffers
* are available.
@@ -2161,12 +2159,13 @@ int vb2_core_streamon(struct vb2_queue *q, unsigned int type)
goto unprepare;
}
+ q->streaming = 1;
+
dprintk(q, 3, "successful\n");
return 0;
unprepare:
call_void_qop(q, unprepare_streaming, q);
- q->streaming = 0;
return ret;
}
EXPORT_SYMBOL_GPL(vb2_core_streamon);
diff --git a/drivers/media/v4l2-core/v4l2-ctrls-api.c b/drivers/media/v4l2-core/v4l2-ctrls-api.c
index 3d3b6dc24ca6..002ea6588edf 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls-api.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls-api.c
@@ -150,8 +150,8 @@ static int user_to_new(struct v4l2_ext_control *c, struct v4l2_ctrl *ctrl)
* then return an error.
*/
if (strlen(ctrl->p_new.p_char) == ctrl->maximum && last)
- ctrl->is_new = 1;
return -ERANGE;
+ ctrl->is_new = 1;
}
return ret;
default:
diff --git a/drivers/memory/atmel-sdramc.c b/drivers/memory/atmel-sdramc.c
index 9c49d00c2a96..ea6e9e1eaf04 100644
--- a/drivers/memory/atmel-sdramc.c
+++ b/drivers/memory/atmel-sdramc.c
@@ -47,19 +47,17 @@ static int atmel_ramc_probe(struct platform_device *pdev)
caps = of_device_get_match_data(&pdev->dev);
if (caps->has_ddrck) {
- clk = devm_clk_get(&pdev->dev, "ddrck");
+ clk = devm_clk_get_enabled(&pdev->dev, "ddrck");
if (IS_ERR(clk))
return PTR_ERR(clk);
- clk_prepare_enable(clk);
}
if (caps->has_mpddr_clk) {
- clk = devm_clk_get(&pdev->dev, "mpddr");
+ clk = devm_clk_get_enabled(&pdev->dev, "mpddr");
if (IS_ERR(clk)) {
pr_err("AT91 RAMC: couldn't get mpddr clock\n");
return PTR_ERR(clk);
}
- clk_prepare_enable(clk);
}
return 0;
diff --git a/drivers/memory/mvebu-devbus.c b/drivers/memory/mvebu-devbus.c
index 8450638e8670..efc6c08db2b7 100644
--- a/drivers/memory/mvebu-devbus.c
+++ b/drivers/memory/mvebu-devbus.c
@@ -280,10 +280,9 @@ static int mvebu_devbus_probe(struct platform_device *pdev)
if (IS_ERR(devbus->base))
return PTR_ERR(devbus->base);
- clk = devm_clk_get(&pdev->dev, NULL);
+ clk = devm_clk_get_enabled(&pdev->dev, NULL);
if (IS_ERR(clk))
return PTR_ERR(clk);
- clk_prepare_enable(clk);
/*
* Obtain clock period in picoseconds,
diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c
index 57d9f91fe89b..d78f73db37c8 100644
--- a/drivers/memory/omap-gpmc.c
+++ b/drivers/memory/omap-gpmc.c
@@ -1918,7 +1918,8 @@ int gpmc_cs_program_settings(int cs, struct gpmc_settings *p)
}
}
- if (p->wait_pin > gpmc_nr_waitpins) {
+ if (p->wait_pin != GPMC_WAITPIN_INVALID &&
+ p->wait_pin > gpmc_nr_waitpins) {
pr_err("%s: invalid wait-pin (%d)\n", __func__, p->wait_pin);
return -EINVAL;
}
diff --git a/drivers/memory/tegra/tegra186.c b/drivers/memory/tegra/tegra186.c
index 62477e592bf5..7bb73f06fad3 100644
--- a/drivers/memory/tegra/tegra186.c
+++ b/drivers/memory/tegra/tegra186.c
@@ -22,32 +22,6 @@
#define MC_SID_STREAMID_SECURITY_WRITE_ACCESS_DISABLED BIT(16)
#define MC_SID_STREAMID_SECURITY_OVERRIDE BIT(8)
-static void tegra186_mc_program_sid(struct tegra_mc *mc)
-{
- unsigned int i;
-
- for (i = 0; i < mc->soc->num_clients; i++) {
- const struct tegra_mc_client *client = &mc->soc->clients[i];
- u32 override, security;
-
- override = readl(mc->regs + client->regs.sid.override);
- security = readl(mc->regs + client->regs.sid.security);
-
- dev_dbg(mc->dev, "client %s: override: %x security: %x\n",
- client->name, override, security);
-
- dev_dbg(mc->dev, "setting SID %u for %s\n", client->sid,
- client->name);
- writel(client->sid, mc->regs + client->regs.sid.override);
-
- override = readl(mc->regs + client->regs.sid.override);
- security = readl(mc->regs + client->regs.sid.security);
-
- dev_dbg(mc->dev, "client %s: override: %x security: %x\n",
- client->name, override, security);
- }
-}
-
static int tegra186_mc_probe(struct tegra_mc *mc)
{
struct platform_device *pdev = to_platform_device(mc->dev);
@@ -85,8 +59,6 @@ populate:
if (err < 0)
return err;
- tegra186_mc_program_sid(mc);
-
return 0;
}
@@ -95,13 +67,6 @@ static void tegra186_mc_remove(struct tegra_mc *mc)
of_platform_depopulate(mc->dev);
}
-static int tegra186_mc_resume(struct tegra_mc *mc)
-{
- tegra186_mc_program_sid(mc);
-
- return 0;
-}
-
#if IS_ENABLED(CONFIG_IOMMU_API)
static void tegra186_mc_client_sid_override(struct tegra_mc *mc,
const struct tegra_mc_client *client,
@@ -173,7 +138,6 @@ static int tegra186_mc_probe_device(struct tegra_mc *mc, struct device *dev)
const struct tegra_mc_ops tegra186_mc_ops = {
.probe = tegra186_mc_probe,
.remove = tegra186_mc_remove,
- .resume = tegra186_mc_resume,
.probe_device = tegra186_mc_probe_device,
.handle_irq = tegra30_mc_handle_irq,
};
diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index c9902a1dcf5d..5310606113fe 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -321,7 +321,7 @@ static void fastrpc_free_map(struct kref *ref)
perm.vmid = QCOM_SCM_VMID_HLOS;
perm.perm = QCOM_SCM_PERM_RWX;
err = qcom_scm_assign_mem(map->phys, map->size,
- &(map->fl->cctx->vmperms[0].vmid), &perm, 1);
+ &map->fl->cctx->perms, &perm, 1);
if (err) {
dev_err(map->fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d",
map->phys, map->size, err);
@@ -334,6 +334,13 @@ static void fastrpc_free_map(struct kref *ref)
dma_buf_put(map->buf);
}
+ if (map->fl) {
+ spin_lock(&map->fl->lock);
+ list_del(&map->node);
+ spin_unlock(&map->fl->lock);
+ map->fl = NULL;
+ }
+
kfree(map);
}
@@ -343,38 +350,41 @@ static void fastrpc_map_put(struct fastrpc_map *map)
kref_put(&map->refcount, fastrpc_free_map);
}
-static void fastrpc_map_get(struct fastrpc_map *map)
+static int fastrpc_map_get(struct fastrpc_map *map)
{
- if (map)
- kref_get(&map->refcount);
+ if (!map)
+ return -ENOENT;
+
+ return kref_get_unless_zero(&map->refcount) ? 0 : -ENOENT;
}
static int fastrpc_map_lookup(struct fastrpc_user *fl, int fd,
- struct fastrpc_map **ppmap)
+ struct fastrpc_map **ppmap, bool take_ref)
{
+ struct fastrpc_session_ctx *sess = fl->sctx;
struct fastrpc_map *map = NULL;
+ int ret = -ENOENT;
- mutex_lock(&fl->mutex);
+ spin_lock(&fl->lock);
list_for_each_entry(map, &fl->maps, node) {
- if (map->fd == fd) {
- *ppmap = map;
- mutex_unlock(&fl->mutex);
- return 0;
- }
- }
- mutex_unlock(&fl->mutex);
-
- return -ENOENT;
-}
+ if (map->fd != fd)
+ continue;
-static int fastrpc_map_find(struct fastrpc_user *fl, int fd,
- struct fastrpc_map **ppmap)
-{
- int ret = fastrpc_map_lookup(fl, fd, ppmap);
+ if (take_ref) {
+ ret = fastrpc_map_get(map);
+ if (ret) {
+ dev_dbg(sess->dev, "%s: Failed to get map fd=%d ret=%d\n",
+ __func__, fd, ret);
+ break;
+ }
+ }
- if (!ret)
- fastrpc_map_get(*ppmap);
+ *ppmap = map;
+ ret = 0;
+ break;
+ }
+ spin_unlock(&fl->lock);
return ret;
}
@@ -746,7 +756,7 @@ static int fastrpc_map_create(struct fastrpc_user *fl, int fd,
struct fastrpc_map *map = NULL;
int err = 0;
- if (!fastrpc_map_find(fl, fd, ppmap))
+ if (!fastrpc_map_lookup(fl, fd, ppmap, true))
return 0;
map = kzalloc(sizeof(*map), GFP_KERNEL);
@@ -788,10 +798,8 @@ static int fastrpc_map_create(struct fastrpc_user *fl, int fd,
* If subsystem VMIDs are defined in DTSI, then do
* hyp_assign from HLOS to those VM(s)
*/
- unsigned int perms = BIT(QCOM_SCM_VMID_HLOS);
-
map->attr = attr;
- err = qcom_scm_assign_mem(map->phys, (u64)map->size, &perms,
+ err = qcom_scm_assign_mem(map->phys, (u64)map->size, &fl->cctx->perms,
fl->cctx->vmperms, fl->cctx->vmcount);
if (err) {
dev_err(sess->dev, "Failed to assign memory with phys 0x%llx size 0x%llx err %d",
@@ -1070,7 +1078,7 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx,
for (i = 0; i < FASTRPC_MAX_FDLIST; i++) {
if (!fdlist[i])
break;
- if (!fastrpc_map_lookup(fl, (int)fdlist[i], &mmap))
+ if (!fastrpc_map_lookup(fl, (int)fdlist[i], &mmap, false))
fastrpc_map_put(mmap);
}
@@ -1258,10 +1266,9 @@ static int fastrpc_init_create_static_process(struct fastrpc_user *fl,
/* Map if we have any heap VMIDs associated with this ADSP Static Process. */
if (fl->cctx->vmcount) {
- unsigned int perms = BIT(QCOM_SCM_VMID_HLOS);
-
err = qcom_scm_assign_mem(fl->cctx->remote_heap->phys,
- (u64)fl->cctx->remote_heap->size, &perms,
+ (u64)fl->cctx->remote_heap->size,
+ &fl->cctx->perms,
fl->cctx->vmperms, fl->cctx->vmcount);
if (err) {
dev_err(fl->sctx->dev, "Failed to assign memory with phys 0x%llx size 0x%llx err %d",
@@ -1309,7 +1316,7 @@ err_invoke:
perm.perm = QCOM_SCM_PERM_RWX;
err = qcom_scm_assign_mem(fl->cctx->remote_heap->phys,
(u64)fl->cctx->remote_heap->size,
- &(fl->cctx->vmperms[0].vmid), &perm, 1);
+ &fl->cctx->perms, &perm, 1);
if (err)
dev_err(fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d",
fl->cctx->remote_heap->phys, fl->cctx->remote_heap->size, err);
@@ -1433,12 +1440,7 @@ err_invoke:
fl->init_mem = NULL;
fastrpc_buf_free(imem);
err_alloc:
- if (map) {
- spin_lock(&fl->lock);
- list_del(&map->node);
- spin_unlock(&fl->lock);
- fastrpc_map_put(map);
- }
+ fastrpc_map_put(map);
err:
kfree(args);
@@ -1514,10 +1516,8 @@ static int fastrpc_device_release(struct inode *inode, struct file *file)
fastrpc_context_put(ctx);
}
- list_for_each_entry_safe(map, m, &fl->maps, node) {
- list_del(&map->node);
+ list_for_each_entry_safe(map, m, &fl->maps, node)
fastrpc_map_put(map);
- }
list_for_each_entry_safe(buf, b, &fl->mmaps, node) {
list_del(&buf->node);
@@ -1894,12 +1894,11 @@ static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp)
/* Add memory to static PD pool, protection thru hypervisor */
if (req.flags != ADSP_MMAP_REMOTE_HEAP_ADDR && fl->cctx->vmcount) {
struct qcom_scm_vmperm perm;
- int err = 0;
perm.vmid = QCOM_SCM_VMID_HLOS;
perm.perm = QCOM_SCM_PERM_RWX;
err = qcom_scm_assign_mem(buf->phys, buf->size,
- &(fl->cctx->vmperms[0].vmid), &perm, 1);
+ &fl->cctx->perms, &perm, 1);
if (err) {
dev_err(fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d",
buf->phys, buf->size, err);
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 4a08b624910a..a81b890c7ee6 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -702,13 +702,15 @@ void *mei_cldev_dma_map(struct mei_cl_device *cldev, u8 buffer_id, size_t size)
if (cl->state == MEI_FILE_UNINITIALIZED) {
ret = mei_cl_link(cl);
if (ret)
- goto out;
+ goto notlinked;
/* update pointers */
cl->cldev = cldev;
}
ret = mei_cl_dma_alloc_and_map(cl, NULL, buffer_id, size);
-out:
+ if (ret)
+ mei_cl_unlink(cl);
+notlinked:
mutex_unlock(&bus->device_lock);
if (ret)
return ERR_PTR(ret);
@@ -758,7 +760,7 @@ int mei_cldev_enable(struct mei_cl_device *cldev)
if (cl->state == MEI_FILE_UNINITIALIZED) {
ret = mei_cl_link(cl);
if (ret)
- goto out;
+ goto notlinked;
/* update pointers */
cl->cldev = cldev;
}
@@ -785,6 +787,9 @@ int mei_cldev_enable(struct mei_cl_device *cldev)
}
out:
+ if (ret)
+ mei_cl_unlink(cl);
+notlinked:
mutex_unlock(&bus->device_lock);
return ret;
@@ -1277,7 +1282,6 @@ static void mei_cl_bus_dev_release(struct device *dev)
mei_cl_flush_queues(cldev->cl, NULL);
mei_me_cl_put(cldev->me_cl);
mei_dev_bus_put(cldev->bus);
- mei_cl_unlink(cldev->cl);
kfree(cldev->cl);
kfree(cldev);
}
diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h
index 99966cd3e7d8..bdc65d50b945 100644
--- a/drivers/misc/mei/hw-me-regs.h
+++ b/drivers/misc/mei/hw-me-regs.h
@@ -111,6 +111,8 @@
#define MEI_DEV_ID_RPL_S 0x7A68 /* Raptor Lake Point S */
+#define MEI_DEV_ID_MTL_M 0x7E70 /* Meteor Lake Point M */
+
/*
* MEI HW Section
*/
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
index 704cd0caa172..5bf0d50d55a0 100644
--- a/drivers/misc/mei/pci-me.c
+++ b/drivers/misc/mei/pci-me.c
@@ -118,6 +118,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
{MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_MTL_M, MEI_ME_PCH15_CFG)},
+
/* required last entry */
{0, }
};
diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c
index aa7b05de97dd..4f8d962bb5b2 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -56,8 +56,6 @@ struct vmci_guest_device {
bool exclusive_vectors;
- struct tasklet_struct datagram_tasklet;
- struct tasklet_struct bm_tasklet;
struct wait_queue_head inout_wq;
void *data_buffer;
@@ -304,9 +302,8 @@ static int vmci_check_host_caps(struct pci_dev *pdev)
* This function assumes that it has exclusive access to the data
* in register(s) for the duration of the call.
*/
-static void vmci_dispatch_dgs(unsigned long data)
+static void vmci_dispatch_dgs(struct vmci_guest_device *vmci_dev)
{
- struct vmci_guest_device *vmci_dev = (struct vmci_guest_device *)data;
u8 *dg_in_buffer = vmci_dev->data_buffer;
struct vmci_datagram *dg;
size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE;
@@ -465,10 +462,8 @@ static void vmci_dispatch_dgs(unsigned long data)
* Scans the notification bitmap for raised flags, clears them
* and handles the notifications.
*/
-static void vmci_process_bitmap(unsigned long data)
+static void vmci_process_bitmap(struct vmci_guest_device *dev)
{
- struct vmci_guest_device *dev = (struct vmci_guest_device *)data;
-
if (!dev->notification_bitmap) {
dev_dbg(dev->dev, "No bitmap present in %s\n", __func__);
return;
@@ -486,13 +481,13 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev)
struct vmci_guest_device *dev = _dev;
/*
- * If we are using MSI-X with exclusive vectors then we simply schedule
- * the datagram tasklet, since we know the interrupt was meant for us.
+ * If we are using MSI-X with exclusive vectors then we simply call
+ * vmci_dispatch_dgs(), since we know the interrupt was meant for us.
* Otherwise we must read the ICR to determine what to do.
*/
if (dev->exclusive_vectors) {
- tasklet_schedule(&dev->datagram_tasklet);
+ vmci_dispatch_dgs(dev);
} else {
unsigned int icr;
@@ -502,12 +497,12 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev)
return IRQ_NONE;
if (icr & VMCI_ICR_DATAGRAM) {
- tasklet_schedule(&dev->datagram_tasklet);
+ vmci_dispatch_dgs(dev);
icr &= ~VMCI_ICR_DATAGRAM;
}
if (icr & VMCI_ICR_NOTIFICATION) {
- tasklet_schedule(&dev->bm_tasklet);
+ vmci_process_bitmap(dev);
icr &= ~VMCI_ICR_NOTIFICATION;
}
@@ -536,7 +531,7 @@ static irqreturn_t vmci_interrupt_bm(int irq, void *_dev)
struct vmci_guest_device *dev = _dev;
/* For MSI-X we can just assume it was meant for us. */
- tasklet_schedule(&dev->bm_tasklet);
+ vmci_process_bitmap(dev);
return IRQ_HANDLED;
}
@@ -638,10 +633,6 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
vmci_dev->iobase = iobase;
vmci_dev->mmio_base = mmio_base;
- tasklet_init(&vmci_dev->datagram_tasklet,
- vmci_dispatch_dgs, (unsigned long)vmci_dev);
- tasklet_init(&vmci_dev->bm_tasklet,
- vmci_process_bitmap, (unsigned long)vmci_dev);
init_waitqueue_head(&vmci_dev->inout_wq);
if (mmio_base != NULL) {
@@ -808,8 +799,9 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
* Request IRQ for legacy or MSI interrupts, or for first
* MSI-X vector.
*/
- error = request_irq(pci_irq_vector(pdev, 0), vmci_interrupt,
- IRQF_SHARED, KBUILD_MODNAME, vmci_dev);
+ error = request_threaded_irq(pci_irq_vector(pdev, 0), NULL,
+ vmci_interrupt, IRQF_SHARED,
+ KBUILD_MODNAME, vmci_dev);
if (error) {
dev_err(&pdev->dev, "Irq %u in use: %d\n",
pci_irq_vector(pdev, 0), error);
@@ -823,9 +815,9 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
* between the vectors.
*/
if (vmci_dev->exclusive_vectors) {
- error = request_irq(pci_irq_vector(pdev, 1),
- vmci_interrupt_bm, 0, KBUILD_MODNAME,
- vmci_dev);
+ error = request_threaded_irq(pci_irq_vector(pdev, 1), NULL,
+ vmci_interrupt_bm, 0,
+ KBUILD_MODNAME, vmci_dev);
if (error) {
dev_err(&pdev->dev,
"Failed to allocate irq %u: %d\n",
@@ -833,9 +825,11 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
goto err_free_irq;
}
if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
- error = request_irq(pci_irq_vector(pdev, 2),
- vmci_interrupt_dma_datagram,
- 0, KBUILD_MODNAME, vmci_dev);
+ error = request_threaded_irq(pci_irq_vector(pdev, 2),
+ NULL,
+ vmci_interrupt_dma_datagram,
+ 0, KBUILD_MODNAME,
+ vmci_dev);
if (error) {
dev_err(&pdev->dev,
"Failed to allocate irq %u: %d\n",
@@ -871,8 +865,6 @@ err_free_bm_irq:
err_free_irq:
free_irq(pci_irq_vector(pdev, 0), vmci_dev);
- tasklet_kill(&vmci_dev->datagram_tasklet);
- tasklet_kill(&vmci_dev->bm_tasklet);
err_disable_msi:
pci_free_irq_vectors(pdev);
@@ -943,9 +935,6 @@ static void vmci_guest_remove_device(struct pci_dev *pdev)
free_irq(pci_irq_vector(pdev, 0), vmci_dev);
pci_free_irq_vectors(pdev);
- tasklet_kill(&vmci_dev->datagram_tasklet);
- tasklet_kill(&vmci_dev->bm_tasklet);
-
if (vmci_dev->notification_bitmap) {
/*
* The device reset above cleared the bitmap state of the
diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index babf21a0adeb..f191a2a76f3b 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -294,6 +294,12 @@ static void sdio_release_func(struct device *dev)
if (!(func->card->quirks & MMC_QUIRK_NONSTD_SDIO))
sdio_free_func_cis(func);
+ /*
+ * We have now removed the link to the tuples in the
+ * card structure, so remove the reference.
+ */
+ put_device(&func->card->dev);
+
kfree(func->info);
kfree(func->tmpbuf);
kfree(func);
@@ -324,6 +330,12 @@ struct sdio_func *sdio_alloc_func(struct mmc_card *card)
device_initialize(&func->dev);
+ /*
+ * We may link to tuples in the card structure,
+ * we need make sure we have a reference to it.
+ */
+ get_device(&func->card->dev);
+
func->dev.parent = &card->dev;
func->dev.bus = &sdio_bus_type;
func->dev.release = sdio_release_func;
@@ -377,10 +389,9 @@ int sdio_add_func(struct sdio_func *func)
*/
void sdio_remove_func(struct sdio_func *func)
{
- if (!sdio_func_present(func))
- return;
+ if (sdio_func_present(func))
+ device_del(&func->dev);
- device_del(&func->dev);
of_node_put(func->dev.of_node);
put_device(&func->dev);
}
diff --git a/drivers/mmc/core/sdio_cis.c b/drivers/mmc/core/sdio_cis.c
index a705ba6eff5b..afaa6cab1adc 100644
--- a/drivers/mmc/core/sdio_cis.c
+++ b/drivers/mmc/core/sdio_cis.c
@@ -404,12 +404,6 @@ int sdio_read_func_cis(struct sdio_func *func)
return ret;
/*
- * Since we've linked to tuples in the card structure,
- * we must make sure we have a reference to it.
- */
- get_device(&func->card->dev);
-
- /*
* Vendor/device id is optional for function CIS, so
* copy it from the card structure as needed.
*/
@@ -434,11 +428,5 @@ void sdio_free_func_cis(struct sdio_func *func)
}
func->tuples = NULL;
-
- /*
- * We have now removed the link to the tuples in the
- * card structure, so remove the reference.
- */
- put_device(&func->card->dev);
}
diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
index dc2db9c185ea..eda1e2ddcaca 100644
--- a/drivers/mmc/host/jz4740_mmc.c
+++ b/drivers/mmc/host/jz4740_mmc.c
@@ -1053,6 +1053,16 @@ static int jz4740_mmc_probe(struct platform_device* pdev)
mmc->ops = &jz4740_mmc_ops;
if (!mmc->f_max)
mmc->f_max = JZ_MMC_CLK_RATE;
+
+ /*
+ * There seems to be a problem with this driver on the JZ4760 and
+ * JZ4760B SoCs. There, when using the maximum rate supported (50 MHz),
+ * the communication fails with many SD cards.
+ * Until this bug is sorted out, limit the maximum rate to 24 MHz.
+ */
+ if (host->version == JZ_MMC_JZ4760 && mmc->f_max > JZ_MMC_CLK_RATE)
+ mmc->f_max = JZ_MMC_CLK_RATE;
+
mmc->f_min = mmc->f_max / 128;
mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index 6e5ea0213b47..5c94ad4661ce 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -435,7 +435,8 @@ static int meson_mmc_clk_init(struct meson_host *host)
clk_reg |= FIELD_PREP(CLK_CORE_PHASE_MASK, CLK_PHASE_180);
clk_reg |= FIELD_PREP(CLK_TX_PHASE_MASK, CLK_PHASE_0);
clk_reg |= FIELD_PREP(CLK_RX_PHASE_MASK, CLK_PHASE_0);
- clk_reg |= CLK_IRQ_SDIO_SLEEP(host);
+ if (host->mmc->caps & MMC_CAP_SDIO_IRQ)
+ clk_reg |= CLK_IRQ_SDIO_SLEEP(host);
writel(clk_reg, host->regs + SD_EMMC_CLOCK);
/* get the mux parents */
@@ -948,16 +949,18 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
{
struct meson_host *host = dev_id;
struct mmc_command *cmd;
- u32 status, raw_status;
+ u32 status, raw_status, irq_mask = IRQ_EN_MASK;
irqreturn_t ret = IRQ_NONE;
+ if (host->mmc->caps & MMC_CAP_SDIO_IRQ)
+ irq_mask |= IRQ_SDIO;
raw_status = readl(host->regs + SD_EMMC_STATUS);
- status = raw_status & (IRQ_EN_MASK | IRQ_SDIO);
+ status = raw_status & irq_mask;
if (!status) {
dev_dbg(host->dev,
- "Unexpected IRQ! irq_en 0x%08lx - status 0x%08x\n",
- IRQ_EN_MASK | IRQ_SDIO, raw_status);
+ "Unexpected IRQ! irq_en 0x%08x - status 0x%08x\n",
+ irq_mask, raw_status);
return IRQ_NONE;
}
@@ -1204,6 +1207,11 @@ static int meson_mmc_probe(struct platform_device *pdev)
goto free_host;
}
+ mmc->caps |= MMC_CAP_CMD23;
+
+ if (mmc->caps & MMC_CAP_SDIO_IRQ)
+ mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD;
+
host->data = (struct meson_mmc_data *)
of_device_get_match_data(&pdev->dev);
if (!host->data) {
@@ -1277,11 +1285,6 @@ static int meson_mmc_probe(struct platform_device *pdev)
spin_lock_init(&host->lock);
- mmc->caps |= MMC_CAP_CMD23;
-
- if (mmc->caps & MMC_CAP_SDIO_IRQ)
- mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD;
-
if (host->dram_access_quirk) {
/* Limit segments to 1 due to low available sram memory */
mmc->max_segs = 1;
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 106dd204b1a7..cc333ad67cac 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1437,7 +1437,7 @@ static int mmc_spi_probe(struct spi_device *spi)
status = mmc_add_host(mmc);
if (status != 0)
- goto fail_add_host;
+ goto fail_glue_init;
/*
* Index 0 is card detect
@@ -1445,7 +1445,7 @@ static int mmc_spi_probe(struct spi_device *spi)
*/
status = mmc_gpiod_request_cd(mmc, NULL, 0, false, 1000);
if (status == -EPROBE_DEFER)
- goto fail_add_host;
+ goto fail_gpiod_request;
if (!status) {
/*
* The platform has a CD GPIO signal that may support
@@ -1460,7 +1460,7 @@ static int mmc_spi_probe(struct spi_device *spi)
/* Index 1 is write protect/read only */
status = mmc_gpiod_request_ro(mmc, NULL, 1, 0);
if (status == -EPROBE_DEFER)
- goto fail_add_host;
+ goto fail_gpiod_request;
if (!status)
has_ro = true;
@@ -1474,7 +1474,7 @@ static int mmc_spi_probe(struct spi_device *spi)
? ", cd polling" : "");
return 0;
-fail_add_host:
+fail_gpiod_request:
mmc_remove_host(mmc);
fail_glue_init:
mmc_spi_dma_free(host);
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 89ef0c80ac37..9e73c34b6401 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -107,6 +107,7 @@
#define ESDHC_TUNING_START_TAP_DEFAULT 0x1
#define ESDHC_TUNING_START_TAP_MASK 0x7f
#define ESDHC_TUNING_CMD_CRC_CHECK_DISABLE (1 << 7)
+#define ESDHC_TUNING_STEP_DEFAULT 0x1
#define ESDHC_TUNING_STEP_MASK 0x00070000
#define ESDHC_TUNING_STEP_SHIFT 16
@@ -1368,7 +1369,7 @@ static void sdhci_esdhc_imx_hwinit(struct sdhci_host *host)
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
struct cqhci_host *cq_host = host->mmc->cqe_private;
- int tmp;
+ u32 tmp;
if (esdhc_is_usdhc(imx_data)) {
/*
@@ -1423,17 +1424,24 @@ static void sdhci_esdhc_imx_hwinit(struct sdhci_host *host)
if (imx_data->socdata->flags & ESDHC_FLAG_STD_TUNING) {
tmp = readl(host->ioaddr + ESDHC_TUNING_CTRL);
- tmp |= ESDHC_STD_TUNING_EN |
- ESDHC_TUNING_START_TAP_DEFAULT;
- if (imx_data->boarddata.tuning_start_tap) {
- tmp &= ~ESDHC_TUNING_START_TAP_MASK;
+ tmp |= ESDHC_STD_TUNING_EN;
+
+ /*
+ * ROM code or bootloader may config the start tap
+ * and step, unmask them first.
+ */
+ tmp &= ~(ESDHC_TUNING_START_TAP_MASK | ESDHC_TUNING_STEP_MASK);
+ if (imx_data->boarddata.tuning_start_tap)
tmp |= imx_data->boarddata.tuning_start_tap;
- }
+ else
+ tmp |= ESDHC_TUNING_START_TAP_DEFAULT;
if (imx_data->boarddata.tuning_step) {
- tmp &= ~ESDHC_TUNING_STEP_MASK;
tmp |= imx_data->boarddata.tuning_step
<< ESDHC_TUNING_STEP_SHIFT;
+ } else {
+ tmp |= ESDHC_TUNING_STEP_DEFAULT
+ << ESDHC_TUNING_STEP_SHIFT;
}
/* Disable the CMD CRC check for tuning, if not, need to
diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c
index b16e12e62e72..3db9f32d6a7b 100644
--- a/drivers/mmc/host/sunxi-mmc.c
+++ b/drivers/mmc/host/sunxi-mmc.c
@@ -1492,9 +1492,11 @@ static int sunxi_mmc_remove(struct platform_device *pdev)
struct sunxi_mmc_host *host = mmc_priv(mmc);
mmc_remove_host(mmc);
- pm_runtime_force_suspend(&pdev->dev);
- disable_irq(host->irq);
- sunxi_mmc_disable(host);
+ pm_runtime_disable(&pdev->dev);
+ if (!pm_runtime_status_suspended(&pdev->dev)) {
+ disable_irq(host->irq);
+ sunxi_mmc_disable(host);
+ }
dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma);
mmc_free_host(mmc);
diff --git a/drivers/mtd/parsers/scpart.c b/drivers/mtd/parsers/scpart.c
index 02601bb33de4..6e5e11c37078 100644
--- a/drivers/mtd/parsers/scpart.c
+++ b/drivers/mtd/parsers/scpart.c
@@ -50,7 +50,7 @@ static int scpart_scan_partmap(struct mtd_info *master, loff_t partmap_offs,
int cnt = 0;
int res = 0;
int res2;
- loff_t offs;
+ uint32_t offs;
size_t retlen;
struct sc_part_desc *pdesc = NULL;
struct sc_part_desc *tmpdesc;
diff --git a/drivers/mtd/parsers/tplink_safeloader.c b/drivers/mtd/parsers/tplink_safeloader.c
index f601e7bd8627..1c689dafca2a 100644
--- a/drivers/mtd/parsers/tplink_safeloader.c
+++ b/drivers/mtd/parsers/tplink_safeloader.c
@@ -91,7 +91,7 @@ static int mtd_parser_tplink_safeloader_parse(struct mtd_info *mtd,
buf = mtd_parser_tplink_safeloader_read_table(mtd);
if (!buf) {
err = -ENOENT;
- goto err_out;
+ goto err_free_parts;
}
for (idx = 0, offset = TPLINK_SAFELOADER_DATA_OFFSET;
@@ -118,6 +118,8 @@ static int mtd_parser_tplink_safeloader_parse(struct mtd_info *mtd,
err_free:
for (idx -= 1; idx >= 0; idx--)
kfree(parts[idx].name);
+err_free_parts:
+ kfree(parts);
err_out:
return err;
};
diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
index d8703d7dfd0a..d67c926bca8b 100644
--- a/drivers/mtd/spi-nor/core.c
+++ b/drivers/mtd/spi-nor/core.c
@@ -10,6 +10,7 @@
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/module.h>
+#include <linux/delay.h>
#include <linux/device.h>
#include <linux/mutex.h>
#include <linux/math64.h>
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 455b555275f1..c99ffe6c683a 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -1549,6 +1549,7 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr)
slave_err(bond->dev, port->slave->dev,
"Port %d did not find a suitable aggregator\n",
port->actor_port_number);
+ return;
}
}
/* if all aggregator's ports are READY_N == TRUE, set ready=TRUE
diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c
index 4f9b4a18c74c..594094526648 100644
--- a/drivers/net/bonding/bond_debugfs.c
+++ b/drivers/net/bonding/bond_debugfs.c
@@ -76,7 +76,7 @@ void bond_debug_reregister(struct bonding *bond)
d = debugfs_rename(bonding_debug_root, bond->debug_dir,
bonding_debug_root, bond->dev->name);
- if (d) {
+ if (!IS_ERR(d)) {
bond->debug_dir = d;
} else {
netdev_warn(bond->dev, "failed to reregister, so just unregister old one\n");
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index b4c65783960a..0363ce597661 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2654,10 +2654,12 @@ static void bond_miimon_link_change(struct bonding *bond,
static void bond_miimon_commit(struct bonding *bond)
{
- struct slave *slave, *primary;
+ struct slave *slave, *primary, *active;
bool do_failover = false;
struct list_head *iter;
+ ASSERT_RTNL();
+
bond_for_each_slave(bond, slave, iter) {
switch (slave->link_new_state) {
case BOND_LINK_NOCHANGE:
@@ -2700,8 +2702,8 @@ static void bond_miimon_commit(struct bonding *bond)
bond_miimon_link_change(bond, slave, BOND_LINK_UP);
- if (!rcu_access_pointer(bond->curr_active_slave) || slave == primary ||
- slave->prio > rcu_dereference(bond->curr_active_slave)->prio)
+ active = rtnl_dereference(bond->curr_active_slave);
+ if (!active || slave == primary || slave->prio > active->prio)
do_failover = true;
continue;
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ethtool.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ethtool.c
index 3585f02575df..57eeb066a945 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ethtool.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ethtool.c
@@ -48,6 +48,7 @@ mcp251xfd_ring_set_ringparam(struct net_device *ndev,
priv->rx_obj_num = layout.cur_rx;
priv->rx_obj_num_coalesce_irq = layout.rx_coalesce;
priv->tx->obj_num = layout.cur_tx;
+ priv->tx_obj_num_coalesce_irq = layout.tx_coalesce;
return 0;
}
diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
index c26755f662c1..f6f3b43dfb06 100644
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig
@@ -35,12 +35,13 @@ config NET_DSA_LANTIQ_GSWIP
the xrx200 / VR9 SoC.
config NET_DSA_MT7530
- tristate "MediaTek MT753x and MT7621 Ethernet switch support"
+ tristate "MediaTek MT7530 and MT7531 Ethernet switch support"
select NET_DSA_TAG_MTK
select MEDIATEK_GE_PHY
help
- This enables support for the MediaTek MT7530, MT7531, and MT7621
- Ethernet switch chips.
+ This enables support for the MediaTek MT7530 and MT7531 Ethernet
+ switch chips. Multi-chip module MT7530 in MT7621AT, MT7621DAT,
+ MT7621ST and MT7623AI SoCs is supported.
config NET_DSA_MV88E6060
tristate "Marvell 88E6060 ethernet switch chip support"
diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
index 47b54ecf2c6f..6178a96e389f 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -540,10 +540,10 @@ int ksz9477_fdb_del(struct ksz_device *dev, int port,
ksz_read32(dev, REG_SW_ALU_VAL_D, &alu_table[3]);
/* clear forwarding port */
- alu_table[2] &= ~BIT(port);
+ alu_table[1] &= ~BIT(port);
/* if there is no port to forward, clear table */
- if ((alu_table[2] & ALU_V_PORT_MAP) == 0) {
+ if ((alu_table[1] & ALU_V_PORT_MAP) == 0) {
alu_table[0] = 0;
alu_table[1] = 0;
alu_table[2] = 0;
diff --git a/drivers/net/dsa/microchip/ksz9477_i2c.c b/drivers/net/dsa/microchip/ksz9477_i2c.c
index c1a633ca1e6d..e315f669ec06 100644
--- a/drivers/net/dsa/microchip/ksz9477_i2c.c
+++ b/drivers/net/dsa/microchip/ksz9477_i2c.c
@@ -104,7 +104,7 @@ static const struct of_device_id ksz9477_dt_ids[] = {
},
{
.compatible = "microchip,ksz8563",
- .data = &ksz_switch_chips[KSZ9893]
+ .data = &ksz_switch_chips[KSZ8563]
},
{
.compatible = "microchip,ksz9567",
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 908fa89444c9..338f238f2043 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -1309,14 +1309,26 @@ mt7530_port_set_vlan_aware(struct dsa_switch *ds, int port)
if (!priv->ports[port].pvid)
mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
MT7530_VLAN_ACC_TAGGED);
- }
- /* Set the port as a user port which is to be able to recognize VID
- * from incoming packets before fetching entry within the VLAN table.
- */
- mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK | PVC_EG_TAG_MASK,
- VLAN_ATTR(MT7530_VLAN_USER) |
- PVC_EG_TAG(MT7530_VLAN_EG_DISABLED));
+ /* Set the port as a user port which is to be able to recognize
+ * VID from incoming packets before fetching entry within the
+ * VLAN table.
+ */
+ mt7530_rmw(priv, MT7530_PVC_P(port),
+ VLAN_ATTR_MASK | PVC_EG_TAG_MASK,
+ VLAN_ATTR(MT7530_VLAN_USER) |
+ PVC_EG_TAG(MT7530_VLAN_EG_DISABLED));
+ } else {
+ /* Also set CPU ports to the "user" VLAN port attribute, to
+ * allow VLAN classification, but keep the EG_TAG attribute as
+ * "consistent" (i.o.w. don't change its value) for packets
+ * received by the switch from the CPU, so that tagged packets
+ * are forwarded to user ports as tagged, and untagged as
+ * untagged.
+ */
+ mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK,
+ VLAN_ATTR(MT7530_VLAN_USER));
+ }
}
static void
diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig
index 7a2445a34eb7..e3181d5471df 100644
--- a/drivers/net/dsa/mv88e6xxx/Kconfig
+++ b/drivers/net/dsa/mv88e6xxx/Kconfig
@@ -2,7 +2,6 @@
config NET_DSA_MV88E6XXX
tristate "Marvell 88E6xxx Ethernet switch fabric support"
depends on NET_DSA
- depends on PTP_1588_CLOCK_OPTIONAL
select IRQ_DOMAIN
select NET_DSA_TAG_EDSA
select NET_DSA_TAG_DSA
@@ -13,7 +12,8 @@ config NET_DSA_MV88E6XXX
config NET_DSA_MV88E6XXX_PTP
bool "PTP support for Marvell 88E6xxx"
default n
- depends on NET_DSA_MV88E6XXX && PTP_1588_CLOCK
+ depends on (NET_DSA_MV88E6XXX = y && PTP_1588_CLOCK = y) || \
+ (NET_DSA_MV88E6XXX = m && PTP_1588_CLOCK)
help
Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch
chips that support it.
diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c
index c5c3b4e92f28..2f224b166bbb 100644
--- a/drivers/net/dsa/qca/qca8k-8xxx.c
+++ b/drivers/net/dsa/qca/qca8k-8xxx.c
@@ -37,77 +37,104 @@ qca8k_split_addr(u32 regaddr, u16 *r1, u16 *r2, u16 *page)
}
static int
-qca8k_set_lo(struct qca8k_priv *priv, int phy_id, u32 regnum, u16 lo)
+qca8k_mii_write_lo(struct mii_bus *bus, int phy_id, u32 regnum, u32 val)
{
- u16 *cached_lo = &priv->mdio_cache.lo;
- struct mii_bus *bus = priv->bus;
int ret;
+ u16 lo;
- if (lo == *cached_lo)
- return 0;
-
+ lo = val & 0xffff;
ret = bus->write(bus, phy_id, regnum, lo);
if (ret < 0)
dev_err_ratelimited(&bus->dev,
"failed to write qca8k 32bit lo register\n");
- *cached_lo = lo;
- return 0;
+ return ret;
}
static int
-qca8k_set_hi(struct qca8k_priv *priv, int phy_id, u32 regnum, u16 hi)
+qca8k_mii_write_hi(struct mii_bus *bus, int phy_id, u32 regnum, u32 val)
{
- u16 *cached_hi = &priv->mdio_cache.hi;
- struct mii_bus *bus = priv->bus;
int ret;
+ u16 hi;
- if (hi == *cached_hi)
- return 0;
-
+ hi = (u16)(val >> 16);
ret = bus->write(bus, phy_id, regnum, hi);
if (ret < 0)
dev_err_ratelimited(&bus->dev,
"failed to write qca8k 32bit hi register\n");
- *cached_hi = hi;
- return 0;
+ return ret;
}
static int
-qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val)
+qca8k_mii_read_lo(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val)
{
int ret;
ret = bus->read(bus, phy_id, regnum);
- if (ret >= 0) {
- *val = ret;
- ret = bus->read(bus, phy_id, regnum + 1);
- *val |= ret << 16;
- }
+ if (ret < 0)
+ goto err;
- if (ret < 0) {
- dev_err_ratelimited(&bus->dev,
- "failed to read qca8k 32bit register\n");
- *val = 0;
- return ret;
- }
+ *val = ret & 0xffff;
+ return 0;
+
+err:
+ dev_err_ratelimited(&bus->dev,
+ "failed to read qca8k 32bit lo register\n");
+ *val = 0;
+
+ return ret;
+}
+static int
+qca8k_mii_read_hi(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val)
+{
+ int ret;
+
+ ret = bus->read(bus, phy_id, regnum);
+ if (ret < 0)
+ goto err;
+
+ *val = ret << 16;
return 0;
+
+err:
+ dev_err_ratelimited(&bus->dev,
+ "failed to read qca8k 32bit hi register\n");
+ *val = 0;
+
+ return ret;
}
-static void
-qca8k_mii_write32(struct qca8k_priv *priv, int phy_id, u32 regnum, u32 val)
+static int
+qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val)
{
- u16 lo, hi;
+ u32 hi, lo;
int ret;
- lo = val & 0xffff;
- hi = (u16)(val >> 16);
+ *val = 0;
- ret = qca8k_set_lo(priv, phy_id, regnum, lo);
- if (ret >= 0)
- ret = qca8k_set_hi(priv, phy_id, regnum + 1, hi);
+ ret = qca8k_mii_read_lo(bus, phy_id, regnum, &lo);
+ if (ret < 0)
+ goto err;
+
+ ret = qca8k_mii_read_hi(bus, phy_id, regnum + 1, &hi);
+ if (ret < 0)
+ goto err;
+
+ *val = lo | hi;
+
+err:
+ return ret;
+}
+
+static void
+qca8k_mii_write32(struct mii_bus *bus, int phy_id, u32 regnum, u32 val)
+{
+ if (qca8k_mii_write_lo(bus, phy_id, regnum, val) < 0)
+ return;
+
+ qca8k_mii_write_hi(bus, phy_id, regnum + 1, val);
}
static int
@@ -146,7 +173,16 @@ static void qca8k_rw_reg_ack_handler(struct dsa_switch *ds, struct sk_buff *skb)
command = get_unaligned_le32(&mgmt_ethhdr->command);
cmd = FIELD_GET(QCA_HDR_MGMT_CMD, command);
+
len = FIELD_GET(QCA_HDR_MGMT_LENGTH, command);
+ /* Special case for len of 15 as this is the max value for len and needs to
+ * be increased before converting it from word to dword.
+ */
+ if (len == 15)
+ len++;
+
+ /* We can ignore odd value, we always round up them in the alloc function. */
+ len *= sizeof(u16);
/* Make sure the seq match the requested packet */
if (get_unaligned_le32(&mgmt_ethhdr->seq) == mgmt_eth_data->seq)
@@ -193,17 +229,33 @@ static struct sk_buff *qca8k_alloc_mdio_header(enum mdio_cmd cmd, u32 reg, u32 *
if (!skb)
return NULL;
- /* Max value for len reg is 15 (0xf) but the switch actually return 16 byte
- * Actually for some reason the steps are:
- * 0: nothing
- * 1-4: first 4 byte
- * 5-6: first 12 byte
- * 7-15: all 16 byte
+ /* Hdr mgmt length value is in step of word size.
+ * As an example to process 4 byte of data the correct length to set is 2.
+ * To process 8 byte 4, 12 byte 6, 16 byte 8...
+ *
+ * Odd values will always return the next size on the ack packet.
+ * (length of 3 (6 byte) will always return 8 bytes of data)
+ *
+ * This means that a value of 15 (0xf) actually means reading/writing 32 bytes
+ * of data.
+ *
+ * To correctly calculate the length we devide the requested len by word and
+ * round up.
+ * On the ack function we can skip the odd check as we already handle the
+ * case here.
*/
- if (len == 16)
- real_len = 15;
- else
- real_len = len;
+ real_len = DIV_ROUND_UP(len, sizeof(u16));
+
+ /* We check if the result len is odd and we round up another time to
+ * the next size. (length of 3 will be increased to 4 as switch will always
+ * return 8 bytes)
+ */
+ if (real_len % sizeof(u16) != 0)
+ real_len++;
+
+ /* Max reg value is 0xf(15) but switch will always return the next size (32 byte) */
+ if (real_len == 16)
+ real_len--;
skb_reset_mac_header(skb);
skb_set_network_header(skb, skb->len);
@@ -417,7 +469,7 @@ qca8k_regmap_write(void *ctx, uint32_t reg, uint32_t val)
if (ret < 0)
goto exit;
- qca8k_mii_write32(priv, 0x10 | r2, r1, val);
+ qca8k_mii_write32(bus, 0x10 | r2, r1, val);
exit:
mutex_unlock(&bus->mdio_lock);
@@ -450,7 +502,7 @@ qca8k_regmap_update_bits(void *ctx, uint32_t reg, uint32_t mask, uint32_t write_
val &= ~mask;
val |= write_val;
- qca8k_mii_write32(priv, 0x10 | r2, r1, val);
+ qca8k_mii_write32(bus, 0x10 | r2, r1, val);
exit:
mutex_unlock(&bus->mdio_lock);
@@ -688,9 +740,9 @@ qca8k_mdio_busy_wait(struct mii_bus *bus, u32 reg, u32 mask)
qca8k_split_addr(reg, &r1, &r2, &page);
- ret = read_poll_timeout(qca8k_mii_read32, ret1, !(val & mask), 0,
+ ret = read_poll_timeout(qca8k_mii_read_hi, ret1, !(val & mask), 0,
QCA8K_BUSY_WAIT_TIMEOUT * USEC_PER_MSEC, false,
- bus, 0x10 | r2, r1, &val);
+ bus, 0x10 | r2, r1 + 1, &val);
/* Check if qca8k_read has failed for a different reason
* before returnting -ETIMEDOUT
@@ -725,14 +777,14 @@ qca8k_mdio_write(struct qca8k_priv *priv, int phy, int regnum, u16 data)
if (ret)
goto exit;
- qca8k_mii_write32(priv, 0x10 | r2, r1, val);
+ qca8k_mii_write32(bus, 0x10 | r2, r1, val);
ret = qca8k_mdio_busy_wait(bus, QCA8K_MDIO_MASTER_CTRL,
QCA8K_MDIO_MASTER_BUSY);
exit:
/* even if the busy_wait timeouts try to clear the MASTER_EN */
- qca8k_mii_write32(priv, 0x10 | r2, r1, 0);
+ qca8k_mii_write_hi(bus, 0x10 | r2, r1 + 1, 0);
mutex_unlock(&bus->mdio_lock);
@@ -762,18 +814,18 @@ qca8k_mdio_read(struct qca8k_priv *priv, int phy, int regnum)
if (ret)
goto exit;
- qca8k_mii_write32(priv, 0x10 | r2, r1, val);
+ qca8k_mii_write_hi(bus, 0x10 | r2, r1 + 1, val);
ret = qca8k_mdio_busy_wait(bus, QCA8K_MDIO_MASTER_CTRL,
QCA8K_MDIO_MASTER_BUSY);
if (ret)
goto exit;
- ret = qca8k_mii_read32(bus, 0x10 | r2, r1, &val);
+ ret = qca8k_mii_read_lo(bus, 0x10 | r2, r1, &val);
exit:
/* even if the busy_wait timeouts try to clear the MASTER_EN */
- qca8k_mii_write32(priv, 0x10 | r2, r1, 0);
+ qca8k_mii_write_hi(bus, 0x10 | r2, r1 + 1, 0);
mutex_unlock(&bus->mdio_lock);
@@ -1943,8 +1995,6 @@ qca8k_sw_probe(struct mdio_device *mdiodev)
}
priv->mdio_cache.page = 0xffff;
- priv->mdio_cache.lo = 0xffff;
- priv->mdio_cache.hi = 0xffff;
/* Check the detected switch id */
ret = qca8k_read_switch_id(priv);
diff --git a/drivers/net/dsa/qca/qca8k.h b/drivers/net/dsa/qca/qca8k.h
index 0b7a5cb12321..03514f7a20be 100644
--- a/drivers/net/dsa/qca/qca8k.h
+++ b/drivers/net/dsa/qca/qca8k.h
@@ -375,11 +375,6 @@ struct qca8k_mdio_cache {
* mdio writes
*/
u16 page;
-/* lo and hi can also be cached and from Documentation we can skip one
- * extra mdio write if lo or hi is didn't change.
- */
- u16 lo;
- u16 hi;
};
struct qca8k_pcs {
diff --git a/drivers/net/ethernet/adi/adin1110.c b/drivers/net/ethernet/adi/adin1110.c
index 0805f249fff2..c26b8597945b 100644
--- a/drivers/net/ethernet/adi/adin1110.c
+++ b/drivers/net/ethernet/adi/adin1110.c
@@ -356,7 +356,7 @@ static int adin1110_read_fifo(struct adin1110_port_priv *port_priv)
if ((port_priv->flags & IFF_ALLMULTI && rxb->pkt_type == PACKET_MULTICAST) ||
(port_priv->flags & IFF_BROADCAST && rxb->pkt_type == PACKET_BROADCAST))
- rxb->offload_fwd_mark = 1;
+ rxb->offload_fwd_mark = port_priv->priv->forwarding;
netif_rx(rxb);
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 8c8b4c88c7de..451c3a1b6255 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -2400,29 +2400,18 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
return -EOPNOTSUPP;
}
- switch (func) {
- case ENA_ADMIN_TOEPLITZ:
- if (key) {
- if (key_len != sizeof(hash_key->key)) {
- netdev_err(ena_dev->net_device,
- "key len (%u) doesn't equal the supported size (%zu)\n",
- key_len, sizeof(hash_key->key));
- return -EINVAL;
- }
- memcpy(hash_key->key, key, key_len);
- rss->hash_init_val = init_val;
- hash_key->key_parts = key_len / sizeof(hash_key->key[0]);
+ if ((func == ENA_ADMIN_TOEPLITZ) && key) {
+ if (key_len != sizeof(hash_key->key)) {
+ netdev_err(ena_dev->net_device,
+ "key len (%u) doesn't equal the supported size (%zu)\n",
+ key_len, sizeof(hash_key->key));
+ return -EINVAL;
}
- break;
- case ENA_ADMIN_CRC32:
- rss->hash_init_val = init_val;
- break;
- default:
- netdev_err(ena_dev->net_device, "Invalid hash function (%d)\n",
- func);
- return -EINVAL;
+ memcpy(hash_key->key, key, key_len);
+ hash_key->key_parts = key_len / sizeof(hash_key->key[0]);
}
+ rss->hash_init_val = init_val;
old_func = rss->hash_func;
rss->hash_func = func;
rc = ena_com_set_hash_function(ena_dev);
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index 48ae6d810f8f..8da79eedc057 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -887,11 +887,7 @@ static int ena_set_tunable(struct net_device *netdev,
switch (tuna->id) {
case ETHTOOL_RX_COPYBREAK:
len = *(u32 *)data;
- if (len > adapter->netdev->mtu) {
- ret = -EINVAL;
- break;
- }
- adapter->rx_copybreak = len;
+ ret = ena_set_rx_copybreak(adapter, len);
break;
default:
ret = -EINVAL;
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index a95529a69cbb..e8ad5ea31aff 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -374,9 +374,9 @@ static int ena_xdp_xmit(struct net_device *dev, int n,
static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
{
+ u32 verdict = ENA_XDP_PASS;
struct bpf_prog *xdp_prog;
struct ena_ring *xdp_ring;
- u32 verdict = XDP_PASS;
struct xdp_frame *xdpf;
u64 *xdp_stat;
@@ -393,7 +393,7 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
if (unlikely(!xdpf)) {
trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
xdp_stat = &rx_ring->rx_stats.xdp_aborted;
- verdict = XDP_ABORTED;
+ verdict = ENA_XDP_DROP;
break;
}
@@ -409,29 +409,35 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
spin_unlock(&xdp_ring->xdp_tx_lock);
xdp_stat = &rx_ring->rx_stats.xdp_tx;
+ verdict = ENA_XDP_TX;
break;
case XDP_REDIRECT:
if (likely(!xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))) {
xdp_stat = &rx_ring->rx_stats.xdp_redirect;
+ verdict = ENA_XDP_REDIRECT;
break;
}
trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
xdp_stat = &rx_ring->rx_stats.xdp_aborted;
- verdict = XDP_ABORTED;
+ verdict = ENA_XDP_DROP;
break;
case XDP_ABORTED:
trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
xdp_stat = &rx_ring->rx_stats.xdp_aborted;
+ verdict = ENA_XDP_DROP;
break;
case XDP_DROP:
xdp_stat = &rx_ring->rx_stats.xdp_drop;
+ verdict = ENA_XDP_DROP;
break;
case XDP_PASS:
xdp_stat = &rx_ring->rx_stats.xdp_pass;
+ verdict = ENA_XDP_PASS;
break;
default:
bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, verdict);
xdp_stat = &rx_ring->rx_stats.xdp_invalid;
+ verdict = ENA_XDP_DROP;
}
ena_increase_stat(xdp_stat, 1, &rx_ring->syncp);
@@ -512,16 +518,18 @@ static void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter,
struct bpf_prog *prog,
int first, int count)
{
+ struct bpf_prog *old_bpf_prog;
struct ena_ring *rx_ring;
int i = 0;
for (i = first; i < count; i++) {
rx_ring = &adapter->rx_ring[i];
- xchg(&rx_ring->xdp_bpf_prog, prog);
- if (prog) {
+ old_bpf_prog = xchg(&rx_ring->xdp_bpf_prog, prog);
+
+ if (!old_bpf_prog && prog) {
ena_xdp_register_rxq_info(rx_ring);
rx_ring->rx_headroom = XDP_PACKET_HEADROOM;
- } else {
+ } else if (old_bpf_prog && !prog) {
ena_xdp_unregister_rxq_info(rx_ring);
rx_ring->rx_headroom = NET_SKB_PAD;
}
@@ -672,6 +680,7 @@ static void ena_init_io_rings_common(struct ena_adapter *adapter,
ring->ena_dev = adapter->ena_dev;
ring->per_napi_packets = 0;
ring->cpu = 0;
+ ring->numa_node = 0;
ring->no_interrupt_event_cnt = 0;
u64_stats_init(&ring->syncp);
}
@@ -775,6 +784,7 @@ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
tx_ring->next_to_use = 0;
tx_ring->next_to_clean = 0;
tx_ring->cpu = ena_irq->cpu;
+ tx_ring->numa_node = node;
return 0;
err_push_buf_intermediate_buf:
@@ -907,6 +917,7 @@ static int ena_setup_rx_resources(struct ena_adapter *adapter,
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
rx_ring->cpu = ena_irq->cpu;
+ rx_ring->numa_node = node;
return 0;
}
@@ -1619,12 +1630,12 @@ static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp)
* we expect, then we simply drop it
*/
if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU))
- return XDP_DROP;
+ return ENA_XDP_DROP;
ret = ena_xdp_execute(rx_ring, xdp);
/* The xdp program might expand the headers */
- if (ret == XDP_PASS) {
+ if (ret == ENA_XDP_PASS) {
rx_info->page_offset = xdp->data - xdp->data_hard_start;
rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data;
}
@@ -1663,7 +1674,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
xdp_init_buff(&xdp, ENA_PAGE_SIZE, &rx_ring->xdp_rxq);
do {
- xdp_verdict = XDP_PASS;
+ xdp_verdict = ENA_XDP_PASS;
skb = NULL;
ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
ena_rx_ctx.max_bufs = rx_ring->sgl_size;
@@ -1691,7 +1702,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp);
/* allocate skb and fill it */
- if (xdp_verdict == XDP_PASS)
+ if (xdp_verdict == ENA_XDP_PASS)
skb = ena_rx_skb(rx_ring,
rx_ring->ena_bufs,
ena_rx_ctx.descs,
@@ -1709,14 +1720,15 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
/* Packets was passed for transmission, unmap it
* from RX side.
*/
- if (xdp_verdict == XDP_TX || xdp_verdict == XDP_REDIRECT) {
+ if (xdp_verdict & ENA_XDP_FORWARDED) {
ena_unmap_rx_buff(rx_ring,
&rx_ring->rx_buffer_info[req_id]);
rx_ring->rx_buffer_info[req_id].page = NULL;
}
}
- if (xdp_verdict != XDP_PASS) {
+ if (xdp_verdict != ENA_XDP_PASS) {
xdp_flags |= xdp_verdict;
+ total_len += ena_rx_ctx.ena_bufs[0].len;
res_budget--;
continue;
}
@@ -1760,7 +1772,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
ena_refill_rx_bufs(rx_ring, refill_required);
}
- if (xdp_flags & XDP_REDIRECT)
+ if (xdp_flags & ENA_XDP_REDIRECT)
xdp_do_flush_map();
return work_done;
@@ -1814,8 +1826,9 @@ static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi)
static void ena_unmask_interrupt(struct ena_ring *tx_ring,
struct ena_ring *rx_ring)
{
+ u32 rx_interval = tx_ring->smoothed_interval;
struct ena_eth_io_intr_reg intr_reg;
- u32 rx_interval = 0;
+
/* Rx ring can be NULL when for XDP tx queues which don't have an
* accompanying rx_ring pair.
*/
@@ -1853,20 +1866,27 @@ static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
if (likely(tx_ring->cpu == cpu))
goto out;
+ tx_ring->cpu = cpu;
+ if (rx_ring)
+ rx_ring->cpu = cpu;
+
numa_node = cpu_to_node(cpu);
+
+ if (likely(tx_ring->numa_node == numa_node))
+ goto out;
+
put_cpu();
if (numa_node != NUMA_NO_NODE) {
ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node);
- if (rx_ring)
+ tx_ring->numa_node = numa_node;
+ if (rx_ring) {
+ rx_ring->numa_node = numa_node;
ena_com_update_numa_node(rx_ring->ena_com_io_cq,
numa_node);
+ }
}
- tx_ring->cpu = cpu;
- if (rx_ring)
- rx_ring->cpu = cpu;
-
return;
out:
put_cpu();
@@ -1987,11 +2007,10 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
ena_adjust_adaptive_rx_intr_moderation(ena_napi);
+ ena_update_ring_numa_node(tx_ring, rx_ring);
ena_unmask_interrupt(tx_ring, rx_ring);
}
- ena_update_ring_numa_node(tx_ring, rx_ring);
-
ret = rx_work_done;
} else {
ret = budget;
@@ -2376,7 +2395,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
ctx.msix_vector = msix_vector;
ctx.queue_size = tx_ring->ring_size;
- ctx.numa_node = cpu_to_node(tx_ring->cpu);
+ ctx.numa_node = tx_ring->numa_node;
rc = ena_com_create_io_queue(ena_dev, &ctx);
if (rc) {
@@ -2444,7 +2463,7 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
ctx.msix_vector = msix_vector;
ctx.queue_size = rx_ring->ring_size;
- ctx.numa_node = cpu_to_node(rx_ring->cpu);
+ ctx.numa_node = rx_ring->numa_node;
rc = ena_com_create_io_queue(ena_dev, &ctx);
if (rc) {
@@ -2805,6 +2824,24 @@ int ena_update_queue_sizes(struct ena_adapter *adapter,
return dev_was_up ? ena_up(adapter) : 0;
}
+int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak)
+{
+ struct ena_ring *rx_ring;
+ int i;
+
+ if (rx_copybreak > min_t(u16, adapter->netdev->mtu, ENA_PAGE_SIZE))
+ return -EINVAL;
+
+ adapter->rx_copybreak = rx_copybreak;
+
+ for (i = 0; i < adapter->num_io_queues; i++) {
+ rx_ring = &adapter->rx_ring[i];
+ rx_ring->rx_copybreak = rx_copybreak;
+ }
+
+ return 0;
+}
+
int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count)
{
struct ena_com_dev *ena_dev = adapter->ena_dev;
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 1bdce99bf688..2cb141079474 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -262,9 +262,11 @@ struct ena_ring {
bool disable_meta_caching;
u16 no_interrupt_event_cnt;
- /* cpu for TPH */
+ /* cpu and NUMA for TPH */
int cpu;
- /* number of tx/rx_buffer_info's entries */
+ int numa_node;
+
+ /* number of tx/rx_buffer_info's entries */
int ring_size;
enum ena_admin_placement_policy_type tx_mem_queue_type;
@@ -392,6 +394,8 @@ int ena_update_queue_sizes(struct ena_adapter *adapter,
int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count);
+int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak);
+
int ena_get_sset_count(struct net_device *netdev, int sset);
static inline void ena_reset_device(struct ena_adapter *adapter,
@@ -409,6 +413,15 @@ enum ena_xdp_errors_t {
ENA_XDP_NO_ENOUGH_QUEUES,
};
+enum ENA_XDP_ACTIONS {
+ ENA_XDP_PASS = 0,
+ ENA_XDP_TX = BIT(0),
+ ENA_XDP_REDIRECT = BIT(1),
+ ENA_XDP_DROP = BIT(2)
+};
+
+#define ENA_XDP_FORWARDED (ENA_XDP_TX | ENA_XDP_REDIRECT)
+
static inline bool ena_xdp_present(struct ena_adapter *adapter)
{
return !!adapter->xdp_bpf_prog;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index 3936543a74d8..4030d619e84f 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -524,19 +524,28 @@ static void xgbe_disable_vxlan(struct xgbe_prv_data *pdata)
netif_dbg(pdata, drv, pdata->netdev, "VXLAN acceleration disabled\n");
}
+static unsigned int xgbe_get_fc_queue_count(struct xgbe_prv_data *pdata)
+{
+ unsigned int max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES;
+
+ /* From MAC ver 30H the TFCR is per priority, instead of per queue */
+ if (XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER) >= 0x30)
+ return max_q_count;
+ else
+ return min_t(unsigned int, pdata->tx_q_count, max_q_count);
+}
+
static int xgbe_disable_tx_flow_control(struct xgbe_prv_data *pdata)
{
- unsigned int max_q_count, q_count;
unsigned int reg, reg_val;
- unsigned int i;
+ unsigned int i, q_count;
/* Clear MTL flow control */
for (i = 0; i < pdata->rx_q_count; i++)
XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, EHFC, 0);
/* Clear MAC flow control */
- max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES;
- q_count = min_t(unsigned int, pdata->tx_q_count, max_q_count);
+ q_count = xgbe_get_fc_queue_count(pdata);
reg = MAC_Q0TFCR;
for (i = 0; i < q_count; i++) {
reg_val = XGMAC_IOREAD(pdata, reg);
@@ -553,9 +562,8 @@ static int xgbe_enable_tx_flow_control(struct xgbe_prv_data *pdata)
{
struct ieee_pfc *pfc = pdata->pfc;
struct ieee_ets *ets = pdata->ets;
- unsigned int max_q_count, q_count;
unsigned int reg, reg_val;
- unsigned int i;
+ unsigned int i, q_count;
/* Set MTL flow control */
for (i = 0; i < pdata->rx_q_count; i++) {
@@ -579,8 +587,7 @@ static int xgbe_enable_tx_flow_control(struct xgbe_prv_data *pdata)
}
/* Set MAC flow control */
- max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES;
- q_count = min_t(unsigned int, pdata->tx_q_count, max_q_count);
+ q_count = xgbe_get_fc_queue_count(pdata);
reg = MAC_Q0TFCR;
for (i = 0; i < q_count; i++) {
reg_val = XGMAC_IOREAD(pdata, reg);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 7b666106feee..614c0278419b 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1064,6 +1064,9 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata)
devm_free_irq(pdata->dev, pdata->dev_irq, pdata);
+ tasklet_kill(&pdata->tasklet_dev);
+ tasklet_kill(&pdata->tasklet_ecc);
+
if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq))
devm_free_irq(pdata->dev, pdata->ecc_irq, pdata);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
index 22d4fc547a0a..a9ccc4258ee5 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
@@ -447,8 +447,10 @@ static void xgbe_i2c_stop(struct xgbe_prv_data *pdata)
xgbe_i2c_disable(pdata);
xgbe_i2c_clear_all_interrupts(pdata);
- if (pdata->dev_irq != pdata->i2c_irq)
+ if (pdata->dev_irq != pdata->i2c_irq) {
devm_free_irq(pdata->dev, pdata->i2c_irq, pdata);
+ tasklet_kill(&pdata->tasklet_i2c);
+ }
}
static int xgbe_i2c_start(struct xgbe_prv_data *pdata)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index 4e97b4869522..43fdd111235a 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -496,6 +496,7 @@ static enum xgbe_an xgbe_an73_tx_training(struct xgbe_prv_data *pdata,
reg |= XGBE_KR_TRAINING_ENABLE;
reg |= XGBE_KR_TRAINING_START;
XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg);
+ pdata->kr_start_time = jiffies;
netif_dbg(pdata, link, pdata->netdev,
"KR training initiated\n");
@@ -632,6 +633,8 @@ static enum xgbe_an xgbe_an73_incompat_link(struct xgbe_prv_data *pdata)
xgbe_switch_mode(pdata);
+ pdata->an_result = XGBE_AN_READY;
+
xgbe_an_restart(pdata);
return XGBE_AN_INCOMPAT_LINK;
@@ -1275,9 +1278,30 @@ static bool xgbe_phy_aneg_done(struct xgbe_prv_data *pdata)
static void xgbe_check_link_timeout(struct xgbe_prv_data *pdata)
{
unsigned long link_timeout;
+ unsigned long kr_time;
+ int wait;
link_timeout = pdata->link_check + (XGBE_LINK_TIMEOUT * HZ);
if (time_after(jiffies, link_timeout)) {
+ if ((xgbe_cur_mode(pdata) == XGBE_MODE_KR) &&
+ pdata->phy.autoneg == AUTONEG_ENABLE) {
+ /* AN restart should not happen while KR training is in progress.
+ * The while loop ensures no AN restart during KR training,
+ * waits up to 500ms and AN restart is triggered only if KR
+ * training is failed.
+ */
+ wait = XGBE_KR_TRAINING_WAIT_ITER;
+ while (wait--) {
+ kr_time = pdata->kr_start_time +
+ msecs_to_jiffies(XGBE_AN_MS_TIMEOUT);
+ if (time_after(jiffies, kr_time))
+ break;
+ /* AN restart is not required, if AN result is COMPLETE */
+ if (pdata->an_result == XGBE_AN_COMPLETE)
+ return;
+ usleep_range(10000, 11000);
+ }
+ }
netif_dbg(pdata, link, pdata->netdev, "AN link timeout\n");
xgbe_phy_config_aneg(pdata);
}
@@ -1390,8 +1414,10 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata)
/* Disable auto-negotiation */
xgbe_an_disable_all(pdata);
- if (pdata->dev_irq != pdata->an_irq)
+ if (pdata->dev_irq != pdata->an_irq) {
devm_free_irq(pdata->dev, pdata->an_irq, pdata);
+ tasklet_kill(&pdata->tasklet_an);
+ }
pdata->phy_if.phy_impl.stop(pdata);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 71f24cb47935..7a41367c437d 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -290,6 +290,7 @@
/* Auto-negotiation */
#define XGBE_AN_MS_TIMEOUT 500
#define XGBE_LINK_TIMEOUT 5
+#define XGBE_KR_TRAINING_WAIT_ITER 50
#define XGBE_SGMII_AN_LINK_STATUS BIT(1)
#define XGBE_SGMII_AN_LINK_SPEED (BIT(2) | BIT(3))
@@ -1280,6 +1281,7 @@ struct xgbe_prv_data {
unsigned int parallel_detect;
unsigned int fec_ability;
unsigned long an_start;
+ unsigned long kr_start_time;
enum xgbe_an_mode an_mode;
/* I2C support */
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index d91fdb0c2649..2cf96892e565 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -2784,17 +2784,11 @@ static int bcm_enet_shared_probe(struct platform_device *pdev)
return 0;
}
-static int bcm_enet_shared_remove(struct platform_device *pdev)
-{
- return 0;
-}
-
/* this "shared" driver is needed because both macs share a single
* address space
*/
struct platform_driver bcm63xx_enet_shared_driver = {
.probe = bcm_enet_shared_probe,
- .remove = bcm_enet_shared_remove,
.driver = {
.name = "bcm63xx_enet_shared",
.owner = THIS_MODULE,
diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c
index 02bd3cf9a260..6e4f36aaf5db 100644
--- a/drivers/net/ethernet/broadcom/bgmac-bcma.c
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
@@ -240,12 +240,12 @@ static int bgmac_probe(struct bcma_device *core)
bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL1;
bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_PHY;
- if (ci->pkg == BCMA_PKG_ID_BCM47188 ||
- ci->pkg == BCMA_PKG_ID_BCM47186) {
+ if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == BCMA_PKG_ID_BCM47186) ||
+ (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg == BCMA_PKG_ID_BCM47188)) {
bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_RGMII;
bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
}
- if (ci->pkg == BCMA_PKG_ID_BCM5358)
+ if (ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == BCMA_PKG_ID_BCM5358)
bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_EPHYRMII;
break;
case BCMA_CHIP_ID_BCM53573:
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 4c7d07c684c4..6c32f5c427b5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -991,10 +991,9 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp,
dma_addr -= bp->rx_dma_offset;
dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
DMA_ATTR_WEAK_ORDERING);
- skb = build_skb(page_address(page), BNXT_PAGE_MODE_BUF_SIZE +
- bp->rx_dma_offset);
+ skb = build_skb(page_address(page), PAGE_SIZE);
if (!skb) {
- __free_page(page);
+ page_pool_recycle_direct(rxr->page_pool, page);
return NULL;
}
skb_mark_for_recycle(skb);
@@ -1032,7 +1031,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
skb = napi_alloc_skb(&rxr->bnapi->napi, payload);
if (!skb) {
- __free_page(page);
+ page_pool_recycle_direct(rxr->page_pool, page);
return NULL;
}
@@ -1925,7 +1924,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
dma_addr = rx_buf->mapping;
if (bnxt_xdp_attached(bp, rxr)) {
- bnxt_xdp_buff_init(bp, rxr, cons, &data_ptr, &len, &xdp);
+ bnxt_xdp_buff_init(bp, rxr, cons, data_ptr, len, &xdp);
if (agg_bufs) {
u32 frag_len = bnxt_rx_agg_pages_xdp(bp, cpr, &xdp,
cp_cons, agg_bufs,
@@ -1940,7 +1939,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
}
if (xdp_active) {
- if (bnxt_rx_xdp(bp, rxr, cons, xdp, data, &len, event)) {
+ if (bnxt_rx_xdp(bp, rxr, cons, xdp, data, &data_ptr, &len, event)) {
rc = 1;
goto next_rx;
}
@@ -3969,8 +3968,10 @@ void bnxt_set_ring_params(struct bnxt *bp)
bp->rx_agg_ring_mask = (bp->rx_agg_nr_pages * RX_DESC_CNT) - 1;
if (BNXT_RX_PAGE_MODE(bp)) {
- rx_space = BNXT_PAGE_MODE_BUF_SIZE;
- rx_size = BNXT_MAX_PAGE_MODE_MTU;
+ rx_space = PAGE_SIZE;
+ rx_size = PAGE_SIZE -
+ ALIGN(max(NET_SKB_PAD, XDP_PACKET_HEADROOM), 8) -
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
} else {
rx_size = SKB_DATA_ALIGN(BNXT_RX_COPY_THRESH + NET_IP_ALIGN);
rx_space = rx_size + NET_SKB_PAD +
@@ -5398,15 +5399,16 @@ static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, u16 vnic_id)
req->flags = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_JUMBO_PLACEMENT);
req->enables = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_JUMBO_THRESH_VALID);
- if (BNXT_RX_PAGE_MODE(bp) && !BNXT_RX_JUMBO_MODE(bp)) {
+ if (BNXT_RX_PAGE_MODE(bp)) {
+ req->jumbo_thresh = cpu_to_le16(bp->rx_buf_use_size);
+ } else {
req->flags |= cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV4 |
VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV6);
req->enables |=
cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_HDS_THRESHOLD_VALID);
+ req->jumbo_thresh = cpu_to_le16(bp->rx_copy_thresh);
+ req->hds_threshold = cpu_to_le16(bp->rx_copy_thresh);
}
- /* thresholds not implemented in firmware yet */
- req->jumbo_thresh = cpu_to_le16(bp->rx_copy_thresh);
- req->hds_threshold = cpu_to_le16(bp->rx_copy_thresh);
req->vnic_id = cpu_to_le32(vnic->fw_vnic_id);
return hwrm_req_send(bp, req);
}
@@ -9272,10 +9274,14 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init)
netdev_err(bp->dev, "ring reservation/IRQ init failure rc: %d\n", rc);
return rc;
}
- if (tcs && (bp->tx_nr_rings_per_tc * tcs != bp->tx_nr_rings)) {
+ if (tcs && (bp->tx_nr_rings_per_tc * tcs !=
+ bp->tx_nr_rings - bp->tx_nr_rings_xdp)) {
netdev_err(bp->dev, "tx ring reservation failure\n");
netdev_reset_tc(bp->dev);
- bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
+ if (bp->tx_nr_rings_xdp)
+ bp->tx_nr_rings_per_tc = bp->tx_nr_rings_xdp;
+ else
+ bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
return -ENOMEM;
}
return 0;
@@ -13591,7 +13597,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
return -ENOMEM;
bp = netdev_priv(dev);
- SET_NETDEV_DEVLINK_PORT(dev, &bp->dl_port);
bp->board_idx = ent->driver_data;
bp->msg_enable = BNXT_DEF_MSG_ENABLE;
bnxt_set_max_func_irqs(bp, max_irqs);
@@ -13599,6 +13604,10 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (bnxt_vf_pciid(bp->board_idx))
bp->flags |= BNXT_FLAG_VF;
+ /* No devlink port registration in case of a VF */
+ if (BNXT_PF(bp))
+ SET_NETDEV_DEVLINK_PORT(dev, &bp->dl_port);
+
if (pdev->msix_cap)
bp->flags |= BNXT_FLAG_MSIX_CAP;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 41c6dd0ae447..5163ef4a49ea 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -591,12 +591,20 @@ struct nqe_cn {
#define BNXT_RX_PAGE_SIZE (1 << BNXT_RX_PAGE_SHIFT)
#define BNXT_MAX_MTU 9500
-#define BNXT_PAGE_MODE_BUF_SIZE \
+
+/* First RX buffer page in XDP multi-buf mode
+ *
+ * +-------------------------------------------------------------------------+
+ * | XDP_PACKET_HEADROOM | bp->rx_buf_use_size | skb_shared_info|
+ * | (bp->rx_dma_offset) | | |
+ * +-------------------------------------------------------------------------+
+ */
+#define BNXT_MAX_PAGE_MODE_MTU_SBUF \
((unsigned int)PAGE_SIZE - VLAN_ETH_HLEN - NET_IP_ALIGN - \
XDP_PACKET_HEADROOM)
#define BNXT_MAX_PAGE_MODE_MTU \
- BNXT_PAGE_MODE_BUF_SIZE - \
- SKB_DATA_ALIGN((unsigned int)sizeof(struct skb_shared_info))
+ (BNXT_MAX_PAGE_MODE_MTU_SBUF - \
+ SKB_DATA_ALIGN((unsigned int)sizeof(struct skb_shared_info)))
#define BNXT_MIN_PKT_SIZE 52
@@ -2134,7 +2142,6 @@ struct bnxt {
#define BNXT_DUMP_CRASH 1
struct bpf_prog *xdp_prog;
- u8 xdp_has_frags;
struct bnxt_ptp_cfg *ptp_cfg;
u8 ptp_all_rx_tstamp;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index cbf17fcfb7ab..ec573127b707 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -3969,7 +3969,7 @@ void bnxt_ethtool_init(struct bnxt *bp)
test_info->timeout = HWRM_CMD_TIMEOUT;
for (i = 0; i < bp->num_tests; i++) {
char *str = test_info->string[i];
- char *fw_str = resp->test0_name + i * 32;
+ char *fw_str = resp->test_name[i];
if (i == BNXT_MACLPBK_TEST_IDX) {
strcpy(str, "Mac loopback test (offline)");
@@ -3980,14 +3980,9 @@ void bnxt_ethtool_init(struct bnxt *bp)
} else if (i == BNXT_IRQ_TEST_IDX) {
strcpy(str, "Interrupt_test (offline)");
} else {
- strscpy(str, fw_str, ETH_GSTRING_LEN);
- strncat(str, " test", ETH_GSTRING_LEN - strlen(str));
- if (test_info->offline_mask & (1 << i))
- strncat(str, " (offline)",
- ETH_GSTRING_LEN - strlen(str));
- else
- strncat(str, " (online)",
- ETH_GSTRING_LEN - strlen(str));
+ snprintf(str, ETH_GSTRING_LEN, "%s test (%s)",
+ fw_str, test_info->offline_mask & (1 << i) ?
+ "offline" : "online");
}
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
index 2686a714a59f..a5408879e077 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
@@ -10249,14 +10249,7 @@ struct hwrm_selftest_qlist_output {
u8 unused_0;
__le16 test_timeout;
u8 unused_1[2];
- char test0_name[32];
- char test1_name[32];
- char test2_name[32];
- char test3_name[32];
- char test4_name[32];
- char test5_name[32];
- char test6_name[32];
- char test7_name[32];
+ char test_name[8][32];
u8 eyescope_target_BER_support;
#define SELFTEST_QLIST_RESP_EYESCOPE_TARGET_BER_SUPPORT_BER_1E8_SUPPORTED 0x0UL
#define SELFTEST_QLIST_RESP_EYESCOPE_TARGET_BER_SUPPORT_BER_1E9_SUPPORTED 0x1UL
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index c3065ec0a479..36d5202c0aee 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -177,7 +177,7 @@ bool bnxt_xdp_attached(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
}
void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
- u16 cons, u8 **data_ptr, unsigned int *len,
+ u16 cons, u8 *data_ptr, unsigned int len,
struct xdp_buff *xdp)
{
struct bnxt_sw_rx_bd *rx_buf;
@@ -191,13 +191,10 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
offset = bp->rx_offset;
mapping = rx_buf->mapping - bp->rx_dma_offset;
- dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir);
-
- if (bp->xdp_has_frags)
- buflen = BNXT_PAGE_MODE_BUF_SIZE + offset;
+ dma_sync_single_for_cpu(&pdev->dev, mapping + offset, len, bp->rx_dir);
xdp_init_buff(xdp, buflen, &rxr->xdp_rxq);
- xdp_prepare_buff(xdp, *data_ptr - offset, offset, *len, false);
+ xdp_prepare_buff(xdp, data_ptr - offset, offset, len, false);
}
void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr,
@@ -222,7 +219,8 @@ void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr,
* false - packet should be passed to the stack.
*/
bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
- struct xdp_buff xdp, struct page *page, unsigned int *len, u8 *event)
+ struct xdp_buff xdp, struct page *page, u8 **data_ptr,
+ unsigned int *len, u8 *event)
{
struct bpf_prog *xdp_prog = READ_ONCE(rxr->xdp_prog);
struct bnxt_tx_ring_info *txr;
@@ -255,8 +253,10 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
*event &= ~BNXT_RX_EVENT;
*len = xdp.data_end - xdp.data;
- if (orig_data != xdp.data)
+ if (orig_data != xdp.data) {
offset = xdp.data - xdp.data_hard_start;
+ *data_ptr = xdp.data_hard_start + offset;
+ }
switch (act) {
case XDP_PASS:
@@ -401,10 +401,8 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
netdev_warn(dev, "ethtool rx/tx channels must be combined to support XDP.\n");
return -EOPNOTSUPP;
}
- if (prog) {
+ if (prog)
tx_xdp = bp->rx_nr_rings;
- bp->xdp_has_frags = prog->aux->xdp_has_frags;
- }
tc = netdev_get_num_tc(dev);
if (!tc)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
index 505911ae095d..ea430d6961df 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
@@ -18,8 +18,8 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
struct xdp_buff *xdp);
void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts);
bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
- struct xdp_buff xdp, struct page *page, unsigned int *len,
- u8 *event);
+ struct xdp_buff xdp, struct page *page, u8 **data_ptr,
+ unsigned int *len, u8 *event);
int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp);
int bnxt_xdp_xmit(struct net_device *dev, int num_frames,
struct xdp_frame **frames, u32 flags);
@@ -27,7 +27,7 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames,
bool bnxt_xdp_attached(struct bnxt *bp, struct bnxt_rx_ring_info *rxr);
void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
- u16 cons, u8 **data_ptr, unsigned int *len,
+ u16 cons, u8 *data_ptr, unsigned int len,
struct xdp_buff *xdp);
void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr,
struct xdp_buff *xdp);
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 59debdc344a5..58747292521d 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -11166,7 +11166,7 @@ static void tg3_reset_task(struct work_struct *work)
rtnl_lock();
tg3_full_lock(tp, 0);
- if (!netif_running(tp->dev)) {
+ if (tp->pcierr_recovery || !netif_running(tp->dev)) {
tg3_flag_clear(tp, RESET_TASK_PENDING);
tg3_full_unlock(tp);
rtnl_unlock();
@@ -18101,6 +18101,9 @@ static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev,
netdev_info(netdev, "PCI I/O error detected\n");
+ /* Want to make sure that the reset task doesn't run */
+ tg3_reset_task_cancel(tp);
+
rtnl_lock();
/* Could be second call or maybe we don't have netdev yet */
@@ -18117,9 +18120,6 @@ static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev,
tg3_timer_stop(tp);
- /* Want to make sure that the reset task doesn't run */
- tg3_reset_task_cancel(tp);
-
netif_device_detach(netdev);
/* Clean up software state, even if MMIO is blocked */
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 95667b979fab..6cda31520c42 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -2187,7 +2187,6 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev)
bool cloned = skb_cloned(*skb) || skb_header_cloned(*skb) ||
skb_is_nonlinear(*skb);
int padlen = ETH_ZLEN - (*skb)->len;
- int headroom = skb_headroom(*skb);
int tailroom = skb_tailroom(*skb);
struct sk_buff *nskb;
u32 fcs;
@@ -2201,9 +2200,6 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev)
/* FCS could be appeded to tailroom. */
if (tailroom >= ETH_FCS_LEN)
goto add_fcs;
- /* FCS could be appeded by moving data to headroom. */
- else if (!cloned && headroom + tailroom >= ETH_FCS_LEN)
- padlen = 0;
/* No room for FCS, need to reallocate skb. */
else
padlen = ETH_FCS_LEN;
@@ -2212,10 +2208,7 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev)
padlen += ETH_FCS_LEN;
}
- if (!cloned && headroom + tailroom >= padlen) {
- (*skb)->data = memmove((*skb)->head, (*skb)->data, (*skb)->len);
- skb_set_tail_pointer(*skb, (*skb)->len);
- } else {
+ if (cloned || tailroom < padlen) {
nskb = skb_copy_expand(*skb, 0, padlen, GFP_ATOMIC);
if (!nskb)
return -ENOMEM;
@@ -4634,25 +4627,26 @@ static int init_reset_optional(struct platform_device *pdev)
if (ret)
return dev_err_probe(&pdev->dev, ret,
"failed to init SGMII PHY\n");
- }
- ret = zynqmp_pm_is_function_supported(PM_IOCTL, IOCTL_SET_GEM_CONFIG);
- if (!ret) {
- u32 pm_info[2];
+ ret = zynqmp_pm_is_function_supported(PM_IOCTL, IOCTL_SET_GEM_CONFIG);
+ if (!ret) {
+ u32 pm_info[2];
+
+ ret = of_property_read_u32_array(pdev->dev.of_node, "power-domains",
+ pm_info, ARRAY_SIZE(pm_info));
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to read power management information\n");
+ goto err_out_phy_exit;
+ }
+ ret = zynqmp_pm_set_gem_config(pm_info[1], GEM_CONFIG_FIXED, 0);
+ if (ret)
+ goto err_out_phy_exit;
- ret = of_property_read_u32_array(pdev->dev.of_node, "power-domains",
- pm_info, ARRAY_SIZE(pm_info));
- if (ret) {
- dev_err(&pdev->dev, "Failed to read power management information\n");
- goto err_out_phy_exit;
+ ret = zynqmp_pm_set_gem_config(pm_info[1], GEM_CONFIG_SGMII_MODE, 1);
+ if (ret)
+ goto err_out_phy_exit;
}
- ret = zynqmp_pm_set_gem_config(pm_info[1], GEM_CONFIG_FIXED, 0);
- if (ret)
- goto err_out_phy_exit;
- ret = zynqmp_pm_set_gem_config(pm_info[1], GEM_CONFIG_SGMII_MODE, 1);
- if (ret)
- goto err_out_phy_exit;
}
/* Fully reset controller at hardware level if mapped in device tree */
diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
index bf0190e1d2ea..00e2108f2ca4 100644
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c
@@ -450,7 +450,7 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb,
/* ring full, shall not happen because queue is stopped if full
* below
*/
- netif_stop_queue(tx->adapter->netdev);
+ netif_stop_subqueue(tx->adapter->netdev, tx->queue_index);
spin_unlock_irqrestore(&tx->lock, flags);
@@ -493,7 +493,7 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb,
if (tsnep_tx_desc_available(tx) < (MAX_SKB_FRAGS + 1)) {
/* ring can get full with next frame */
- netif_stop_queue(tx->adapter->netdev);
+ netif_stop_subqueue(tx->adapter->netdev, tx->queue_index);
}
spin_unlock_irqrestore(&tx->lock, flags);
@@ -503,11 +503,14 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb,
static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
{
+ struct tsnep_tx_entry *entry;
+ struct netdev_queue *nq;
unsigned long flags;
int budget = 128;
- struct tsnep_tx_entry *entry;
- int count;
int length;
+ int count;
+
+ nq = netdev_get_tx_queue(tx->adapter->netdev, tx->queue_index);
spin_lock_irqsave(&tx->lock, flags);
@@ -564,8 +567,8 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
} while (likely(budget));
if ((tsnep_tx_desc_available(tx) >= ((MAX_SKB_FRAGS + 1) * 2)) &&
- netif_queue_stopped(tx->adapter->netdev)) {
- netif_wake_queue(tx->adapter->netdev);
+ netif_tx_queue_stopped(nq)) {
+ netif_tx_wake_queue(nq);
}
spin_unlock_irqrestore(&tx->lock, flags);
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 3f8032947d86..027fff9f7db0 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -2410,6 +2410,9 @@ static int dpaa_eth_poll(struct napi_struct *napi, int budget)
cleaned = qman_p_poll_dqrr(np->p, budget);
+ if (np->xdp_act & XDP_REDIRECT)
+ xdp_do_flush();
+
if (cleaned < budget) {
napi_complete_done(napi, cleaned);
qman_p_irqsource_add(np->p, QM_PIRQ_DQRI);
@@ -2417,9 +2420,6 @@ static int dpaa_eth_poll(struct napi_struct *napi, int budget)
qman_p_irqsource_add(np->p, QM_PIRQ_DQRI);
}
- if (np->xdp_act & XDP_REDIRECT)
- xdp_do_flush();
-
return cleaned;
}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 0c35abb7d065..2e79d18fc3c7 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -1993,10 +1993,15 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
if (rx_cleaned >= budget ||
txconf_cleaned >= DPAA2_ETH_TXCONF_PER_NAPI) {
work_done = budget;
+ if (ch->xdp.res & XDP_REDIRECT)
+ xdp_do_flush();
goto out;
}
} while (store_cleaned);
+ if (ch->xdp.res & XDP_REDIRECT)
+ xdp_do_flush();
+
/* Update NET DIM with the values for this CDAN */
dpaa2_io_update_net_dim(ch->dpio, ch->stats.frames_per_cdan,
ch->stats.bytes_per_cdan);
@@ -2032,9 +2037,7 @@ out:
txc_fq->dq_bytes = 0;
}
- if (ch->xdp.res & XDP_REDIRECT)
- xdp_do_flush_map();
- else if (rx_cleaned && ch->xdp.res & XDP_TX)
+ if (rx_cleaned && ch->xdp.res & XDP_TX)
dpaa2_eth_xdp_tx_flush(priv, ch, &priv->fq[flowid]);
return work_done;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 3a79ead5219a..e96449eedfb5 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -2290,14 +2290,14 @@ static void enetc_tx_onestep_tstamp(struct work_struct *work)
priv = container_of(work, struct enetc_ndev_priv, tx_onestep_tstamp);
- netif_tx_lock(priv->ndev);
+ netif_tx_lock_bh(priv->ndev);
clear_bit_unlock(ENETC_TX_ONESTEP_TSTAMP_IN_PROGRESS, &priv->flags);
skb = skb_dequeue(&priv->tx_skbs);
if (skb)
enetc_start_xmit(skb, priv->ndev);
- netif_tx_unlock(priv->ndev);
+ netif_tx_unlock_bh(priv->ndev);
}
static void enetc_tx_onestep_tstamp_init(struct enetc_ndev_priv *priv)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ierb.c b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c
index 91f02c505028..b307bef4dc29 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ierb.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c
@@ -127,11 +127,6 @@ static int enetc_ierb_probe(struct platform_device *pdev)
return 0;
}
-static int enetc_ierb_remove(struct platform_device *pdev)
-{
- return 0;
-}
-
static const struct of_device_id enetc_ierb_match[] = {
{ .compatible = "fsl,ls1028a-enetc-ierb", },
{},
@@ -144,7 +139,6 @@ static struct platform_driver enetc_ierb_driver = {
.of_match_table = enetc_ierb_match,
},
.probe = enetc_ierb_probe,
- .remove = enetc_ierb_remove,
};
module_platform_driver(enetc_ierb_driver);
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 644f3c963730..2341597408d1 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -3191,7 +3191,7 @@ static void fec_enet_free_buffers(struct net_device *ndev)
for (q = 0; q < fep->num_rx_queues; q++) {
rxq = fep->rx_queue[q];
for (i = 0; i < rxq->bd.ring_size; i++)
- page_pool_release_page(rxq->page_pool, rxq->rx_skb_info[i].page);
+ page_pool_put_full_page(rxq->page_pool, rxq->rx_skb_info[i].page, false);
for (i = 0; i < XDP_STATS_TOTAL; i++)
rxq->stats[i] = 0;
diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
index d00bae15a901..d528ca681b6f 100644
--- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
@@ -1430,7 +1430,7 @@ int dtsec_initialization(struct mac_device *mac_dev,
dtsec->dtsec_drv_param->tx_pad_crc = true;
phy_node = of_parse_phandle(mac_node, "tbi-handle", 0);
- if (!phy_node || of_device_is_available(phy_node)) {
+ if (!phy_node || !of_device_is_available(phy_node)) {
of_node_put(phy_node);
err = -EINVAL;
dev_err_probe(mac_dev->dev, err,
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c
index 9349f841bd06..587ad81a2dc3 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.c
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.c
@@ -1055,6 +1055,9 @@ static struct phylink_pcs *memac_pcs_create(struct device_node *mac_node,
return ERR_PTR(-EPROBE_DEFER);
pcs = lynx_pcs_create(mdiodev);
+ if (!pcs)
+ mdio_device_free(mdiodev);
+
return pcs;
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 0ec5730b1788..b4c4fb873568 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -3855,18 +3855,16 @@ static int hns3_gro_complete(struct sk_buff *skb, u32 l234info)
return 0;
}
-static bool hns3_checksum_complete(struct hns3_enet_ring *ring,
+static void hns3_checksum_complete(struct hns3_enet_ring *ring,
struct sk_buff *skb, u32 ptype, u16 csum)
{
if (ptype == HNS3_INVALID_PTYPE ||
hns3_rx_ptype_tbl[ptype].ip_summed != CHECKSUM_COMPLETE)
- return false;
+ return;
hns3_ring_stats_update(ring, csum_complete);
skb->ip_summed = CHECKSUM_COMPLETE;
skb->csum = csum_unfold((__force __sum16)csum);
-
- return true;
}
static void hns3_rx_handle_csum(struct sk_buff *skb, u32 l234info,
@@ -3926,8 +3924,7 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
ptype = hnae3_get_field(ol_info, HNS3_RXD_PTYPE_M,
HNS3_RXD_PTYPE_S);
- if (hns3_checksum_complete(ring, skb, ptype, csum))
- return;
+ hns3_checksum_complete(ring, skb, ptype, csum);
/* check if hardware has done checksum */
if (!(bd_base_info & BIT(HNS3_RXD_L3L4P_B)))
@@ -3936,6 +3933,7 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
if (unlikely(l234info & (BIT(HNS3_RXD_L3E_B) | BIT(HNS3_RXD_L4E_B) |
BIT(HNS3_RXD_OL3E_B) |
BIT(HNS3_RXD_OL4E_B)))) {
+ skb->ip_summed = CHECKSUM_NONE;
hns3_ring_stats_update(ring, l3l4_csum_err);
return;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 4e54f91f7a6c..07ad5f35219e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3910,9 +3910,17 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset)
return ret;
}
- if (!reset || !test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state))
+ if (!reset ||
+ !test_bit(HCLGE_VPORT_STATE_INITED, &vport->state))
continue;
+ if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state) &&
+ hdev->reset_type == HNAE3_FUNC_RESET) {
+ set_bit(HCLGE_VPORT_NEED_NOTIFY_RESET,
+ &vport->need_notify);
+ continue;
+ }
+
/* Inform VF to process the reset.
* hclge_inform_reset_assert_to_vf may fail if VF
* driver is not loaded.
@@ -4609,18 +4617,25 @@ static void hclge_reset_service_task(struct hclge_dev *hdev)
static void hclge_update_vport_alive(struct hclge_dev *hdev)
{
+#define HCLGE_ALIVE_SECONDS_NORMAL 8
+
+ unsigned long alive_time = HCLGE_ALIVE_SECONDS_NORMAL * HZ;
int i;
/* start from vport 1 for PF is always alive */
for (i = 1; i < hdev->num_alloc_vport; i++) {
struct hclge_vport *vport = &hdev->vport[i];
- if (time_after(jiffies, vport->last_active_jiffies + 8 * HZ))
+ if (!test_bit(HCLGE_VPORT_STATE_INITED, &vport->state) ||
+ !test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state))
+ continue;
+ if (time_after(jiffies, vport->last_active_jiffies +
+ alive_time)) {
clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state);
-
- /* If vf is not alive, set to default value */
- if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state))
- vport->mps = HCLGE_MAC_DEFAULT_FRAME;
+ dev_warn(&hdev->pdev->dev,
+ "VF %u heartbeat timeout\n",
+ i - HCLGE_VF_VPORT_START_NUM);
+ }
}
}
@@ -8064,9 +8079,11 @@ int hclge_vport_start(struct hclge_vport *vport)
{
struct hclge_dev *hdev = vport->back;
+ set_bit(HCLGE_VPORT_STATE_INITED, &vport->state);
set_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state);
set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state);
vport->last_active_jiffies = jiffies;
+ vport->need_notify = 0;
if (test_bit(vport->vport_id, hdev->vport_config_block)) {
if (vport->vport_id) {
@@ -8084,7 +8101,9 @@ int hclge_vport_start(struct hclge_vport *vport)
void hclge_vport_stop(struct hclge_vport *vport)
{
+ clear_bit(HCLGE_VPORT_STATE_INITED, &vport->state);
clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state);
+ vport->need_notify = 0;
}
static int hclge_client_start(struct hnae3_handle *handle)
@@ -9208,7 +9227,8 @@ static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf,
return 0;
}
- dev_info(&hdev->pdev->dev, "MAC of VF %d has been set to %s\n",
+ dev_info(&hdev->pdev->dev,
+ "MAC of VF %d has been set to %s, will be active after VF reset\n",
vf, format_mac_addr);
return 0;
}
@@ -10465,12 +10485,16 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
* for DEVICE_VERSION_V3, vf doesn't need to know about the port based
* VLAN state.
*/
- if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3 &&
- test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state))
- (void)hclge_push_vf_port_base_vlan_info(&hdev->vport[0],
- vport->vport_id,
- state, &vlan_info);
-
+ if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3) {
+ if (test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state))
+ (void)hclge_push_vf_port_base_vlan_info(&hdev->vport[0],
+ vport->vport_id,
+ state,
+ &vlan_info);
+ else
+ set_bit(HCLGE_VPORT_NEED_NOTIFY_VF_VLAN,
+ &vport->need_notify);
+ }
return 0;
}
@@ -11941,7 +11965,7 @@ static void hclge_reset_vport_state(struct hclge_dev *hdev)
int i;
for (i = 0; i < hdev->num_alloc_vport; i++) {
- hclge_vport_stop(vport);
+ clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state);
vport++;
}
}
@@ -12754,60 +12778,71 @@ static int hclge_gro_en(struct hnae3_handle *handle, bool enable)
return ret;
}
-static void hclge_sync_promisc_mode(struct hclge_dev *hdev)
+static int hclge_sync_vport_promisc_mode(struct hclge_vport *vport)
{
- struct hclge_vport *vport = &hdev->vport[0];
struct hnae3_handle *handle = &vport->nic;
+ struct hclge_dev *hdev = vport->back;
+ bool uc_en = false;
+ bool mc_en = false;
u8 tmp_flags;
+ bool bc_en;
int ret;
- u16 i;
if (vport->last_promisc_flags != vport->overflow_promisc_flags) {
set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state);
vport->last_promisc_flags = vport->overflow_promisc_flags;
}
- if (test_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state)) {
+ if (!test_and_clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE,
+ &vport->state))
+ return 0;
+
+ /* for PF */
+ if (!vport->vport_id) {
tmp_flags = handle->netdev_flags | vport->last_promisc_flags;
ret = hclge_set_promisc_mode(handle, tmp_flags & HNAE3_UPE,
tmp_flags & HNAE3_MPE);
- if (!ret) {
- clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE,
- &vport->state);
+ if (!ret)
set_bit(HCLGE_VPORT_STATE_VLAN_FLTR_CHANGE,
&vport->state);
- }
+ else
+ set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE,
+ &vport->state);
+ return ret;
}
- for (i = 1; i < hdev->num_alloc_vport; i++) {
- bool uc_en = false;
- bool mc_en = false;
- bool bc_en;
+ /* for VF */
+ if (vport->vf_info.trusted) {
+ uc_en = vport->vf_info.request_uc_en > 0 ||
+ vport->overflow_promisc_flags & HNAE3_OVERFLOW_UPE;
+ mc_en = vport->vf_info.request_mc_en > 0 ||
+ vport->overflow_promisc_flags & HNAE3_OVERFLOW_MPE;
+ }
+ bc_en = vport->vf_info.request_bc_en > 0;
- vport = &hdev->vport[i];
+ ret = hclge_cmd_set_promisc_mode(hdev, vport->vport_id, uc_en,
+ mc_en, bc_en);
+ if (ret) {
+ set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state);
+ return ret;
+ }
+ hclge_set_vport_vlan_fltr_change(vport);
- if (!test_and_clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE,
- &vport->state))
- continue;
+ return 0;
+}
- if (vport->vf_info.trusted) {
- uc_en = vport->vf_info.request_uc_en > 0 ||
- vport->overflow_promisc_flags &
- HNAE3_OVERFLOW_UPE;
- mc_en = vport->vf_info.request_mc_en > 0 ||
- vport->overflow_promisc_flags &
- HNAE3_OVERFLOW_MPE;
- }
- bc_en = vport->vf_info.request_bc_en > 0;
+static void hclge_sync_promisc_mode(struct hclge_dev *hdev)
+{
+ struct hclge_vport *vport;
+ int ret;
+ u16 i;
- ret = hclge_cmd_set_promisc_mode(hdev, vport->vport_id, uc_en,
- mc_en, bc_en);
- if (ret) {
- set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE,
- &vport->state);
+ for (i = 0; i < hdev->num_alloc_vport; i++) {
+ vport = &hdev->vport[i];
+
+ ret = hclge_sync_vport_promisc_mode(vport);
+ if (ret)
return;
- }
- hclge_set_vport_vlan_fltr_change(vport);
}
}
@@ -12944,6 +12979,11 @@ static void hclge_clear_vport_vf_info(struct hclge_vport *vport, int vfid)
struct hclge_vlan_info vlan_info;
int ret;
+ clear_bit(HCLGE_VPORT_STATE_INITED, &vport->state);
+ clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state);
+ vport->need_notify = 0;
+ vport->mps = 0;
+
/* after disable sriov, clean VF rate configured by PF */
ret = hclge_tm_qs_shaper_cfg(vport, 0);
if (ret)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 495b639b0dc2..13f23d606e77 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -995,9 +995,15 @@ enum HCLGE_VPORT_STATE {
HCLGE_VPORT_STATE_MAC_TBL_CHANGE,
HCLGE_VPORT_STATE_PROMISC_CHANGE,
HCLGE_VPORT_STATE_VLAN_FLTR_CHANGE,
+ HCLGE_VPORT_STATE_INITED,
HCLGE_VPORT_STATE_MAX
};
+enum HCLGE_VPORT_NEED_NOTIFY {
+ HCLGE_VPORT_NEED_NOTIFY_RESET,
+ HCLGE_VPORT_NEED_NOTIFY_VF_VLAN,
+};
+
struct hclge_vlan_info {
u16 vlan_proto; /* so far support 802.1Q only */
u16 qos;
@@ -1044,6 +1050,7 @@ struct hclge_vport {
struct hnae3_handle roce;
unsigned long state;
+ unsigned long need_notify;
unsigned long last_active_jiffies;
u32 mps; /* Max packet size */
struct hclge_vf_info vf_info;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index a7b06c63143c..04ff9bf12185 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -124,17 +124,26 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len,
return status;
}
+static int hclge_inform_vf_reset(struct hclge_vport *vport, u16 reset_type)
+{
+ __le16 msg_data;
+ u8 dest_vfid;
+
+ dest_vfid = (u8)vport->vport_id;
+ msg_data = cpu_to_le16(reset_type);
+
+ /* send this requested info to VF */
+ return hclge_send_mbx_msg(vport, (u8 *)&msg_data, sizeof(msg_data),
+ HCLGE_MBX_ASSERTING_RESET, dest_vfid);
+}
+
int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport)
{
struct hclge_dev *hdev = vport->back;
- __le16 msg_data;
u16 reset_type;
- u8 dest_vfid;
BUILD_BUG_ON(HNAE3_MAX_RESET > U16_MAX);
- dest_vfid = (u8)vport->vport_id;
-
if (hdev->reset_type == HNAE3_FUNC_RESET)
reset_type = HNAE3_VF_PF_FUNC_RESET;
else if (hdev->reset_type == HNAE3_FLR_RESET)
@@ -142,11 +151,7 @@ int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport)
else
reset_type = HNAE3_VF_FUNC_RESET;
- msg_data = cpu_to_le16(reset_type);
-
- /* send this requested info to VF */
- return hclge_send_mbx_msg(vport, (u8 *)&msg_data, sizeof(msg_data),
- HCLGE_MBX_ASSERTING_RESET, dest_vfid);
+ return hclge_inform_vf_reset(vport, reset_type);
}
static void hclge_free_vector_ring_chain(struct hnae3_ring_chain_node *head)
@@ -652,9 +657,56 @@ static int hclge_reset_vf(struct hclge_vport *vport)
return hclge_func_reset_cmd(hdev, vport->vport_id);
}
+static void hclge_notify_vf_config(struct hclge_vport *vport)
+{
+ struct hclge_dev *hdev = vport->back;
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+ struct hclge_port_base_vlan_config *vlan_cfg;
+ int ret;
+
+ hclge_push_vf_link_status(vport);
+ if (test_bit(HCLGE_VPORT_NEED_NOTIFY_RESET, &vport->need_notify)) {
+ ret = hclge_inform_vf_reset(vport, HNAE3_VF_PF_FUNC_RESET);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "failed to inform VF %u reset!",
+ vport->vport_id - HCLGE_VF_VPORT_START_NUM);
+ return;
+ }
+ vport->need_notify = 0;
+ return;
+ }
+
+ if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3 &&
+ test_bit(HCLGE_VPORT_NEED_NOTIFY_VF_VLAN, &vport->need_notify)) {
+ vlan_cfg = &vport->port_base_vlan_cfg;
+ ret = hclge_push_vf_port_base_vlan_info(&hdev->vport[0],
+ vport->vport_id,
+ vlan_cfg->state,
+ &vlan_cfg->vlan_info);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "failed to inform VF %u port base vlan!",
+ vport->vport_id - HCLGE_VF_VPORT_START_NUM);
+ return;
+ }
+ clear_bit(HCLGE_VPORT_NEED_NOTIFY_VF_VLAN, &vport->need_notify);
+ }
+}
+
static void hclge_vf_keep_alive(struct hclge_vport *vport)
{
+ struct hclge_dev *hdev = vport->back;
+
vport->last_active_jiffies = jiffies;
+
+ if (test_bit(HCLGE_VPORT_STATE_INITED, &vport->state) &&
+ !test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) {
+ set_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state);
+ dev_info(&hdev->pdev->dev, "VF %u is alive!",
+ vport->vport_id - HCLGE_VF_VPORT_START_NUM);
+ hclge_notify_vf_config(vport);
+ }
}
static int hclge_set_vf_mtu(struct hclge_vport *vport,
@@ -954,6 +1006,7 @@ static int hclge_mbx_vf_uninit_handler(struct hclge_mbx_ops_param *param)
hclge_rm_vport_all_mac_table(param->vport, true,
HCLGE_MAC_ADDR_MC);
hclge_rm_vport_all_vlan_table(param->vport, true);
+ param->vport->mps = 0;
return 0;
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index db6f7cdba958..e84e5be8e59e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -2767,7 +2767,8 @@ static int hclgevf_pci_reset(struct hclgevf_dev *hdev)
struct pci_dev *pdev = hdev->pdev;
int ret = 0;
- if (hdev->reset_type == HNAE3_VF_FULL_RESET &&
+ if ((hdev->reset_type == HNAE3_VF_FULL_RESET ||
+ hdev->reset_type == HNAE3_FLR_RESET) &&
test_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state)) {
hclgevf_misc_irq_uninit(hdev);
hclgevf_uninit_msi(hdev);
@@ -3129,7 +3130,7 @@ static int hclgevf_set_channels(struct hnae3_handle *handle, u32 new_tqps_num,
hclgevf_update_rss_size(handle, new_tqps_num);
- hclge_comm_get_rss_tc_info(cur_rss_size, hdev->hw_tc_map,
+ hclge_comm_get_rss_tc_info(kinfo->rss_size, hdev->hw_tc_map,
tc_offset, tc_valid, tc_size);
ret = hclge_comm_set_rss_tc_mode(&hdev->hw.hw, tc_offset,
tc_valid, tc_size);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 53d0083e35da..52eec0a50492 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2921,7 +2921,7 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
struct i40e_pf *pf = vsi->back;
if (i40e_enabled_xdp_vsi(vsi)) {
- int frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+ int frame_size = new_mtu + I40E_PACKET_HDR_PAD;
if (frame_size > i40e_max_xdp_frame_size(vsi))
return -EINVAL;
@@ -13167,6 +13167,8 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev,
}
br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+ if (!br_spec)
+ return -EINVAL;
nla_for_each_nested(attr, br_spec, rem) {
__u16 mode;
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index 0d1bab4ac1b0..2a9f1eeeb701 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -249,6 +249,7 @@ struct iavf_cloud_filter {
/* board specific private data structure */
struct iavf_adapter {
+ struct workqueue_struct *wq;
struct work_struct reset_task;
struct work_struct adminq_task;
struct delayed_work client_task;
@@ -459,7 +460,6 @@ struct iavf_device {
/* needed by iavf_ethtool.c */
extern char iavf_driver_name[];
-extern struct workqueue_struct *iavf_wq;
static inline const char *iavf_state_str(enum iavf_state_t state)
{
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index d79ead5e8d0c..6f171d1d85b7 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -532,7 +532,7 @@ static int iavf_set_priv_flags(struct net_device *netdev, u32 flags)
if (changed_flags & IAVF_FLAG_LEGACY_RX) {
if (netif_running(netdev)) {
adapter->flags |= IAVF_FLAG_RESET_NEEDED;
- queue_work(iavf_wq, &adapter->reset_task);
+ queue_work(adapter->wq, &adapter->reset_task);
}
}
@@ -672,7 +672,7 @@ static int iavf_set_ringparam(struct net_device *netdev,
if (netif_running(netdev)) {
adapter->flags |= IAVF_FLAG_RESET_NEEDED;
- queue_work(iavf_wq, &adapter->reset_task);
+ queue_work(adapter->wq, &adapter->reset_task);
}
return 0;
@@ -1433,7 +1433,7 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx
adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER;
spin_unlock_bh(&adapter->fdir_fltr_lock);
- mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
+ mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
ret:
if (err && fltr)
@@ -1474,7 +1474,7 @@ static int iavf_del_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx
spin_unlock_bh(&adapter->fdir_fltr_lock);
if (fltr && fltr->state == IAVF_FDIR_FLTR_DEL_REQUEST)
- mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
+ mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
return err;
}
@@ -1658,7 +1658,7 @@ iavf_set_adv_rss_hash_opt(struct iavf_adapter *adapter,
spin_unlock_bh(&adapter->adv_rss_lock);
if (!err)
- mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
+ mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
mutex_unlock(&adapter->crit_lock);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index c4e451ef7942..4b09785d2147 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -49,7 +49,6 @@ MODULE_DESCRIPTION("Intel(R) Ethernet Adaptive Virtual Function Network Driver")
MODULE_LICENSE("GPL v2");
static const struct net_device_ops iavf_netdev_ops;
-struct workqueue_struct *iavf_wq;
int iavf_status_to_errno(enum iavf_status status)
{
@@ -277,7 +276,7 @@ void iavf_schedule_reset(struct iavf_adapter *adapter)
if (!(adapter->flags &
(IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) {
adapter->flags |= IAVF_FLAG_RESET_NEEDED;
- queue_work(iavf_wq, &adapter->reset_task);
+ queue_work(adapter->wq, &adapter->reset_task);
}
}
@@ -291,7 +290,7 @@ void iavf_schedule_reset(struct iavf_adapter *adapter)
void iavf_schedule_request_stats(struct iavf_adapter *adapter)
{
adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_STATS;
- mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
+ mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
}
/**
@@ -411,7 +410,7 @@ static irqreturn_t iavf_msix_aq(int irq, void *data)
if (adapter->state != __IAVF_REMOVE)
/* schedule work on the private workqueue */
- queue_work(iavf_wq, &adapter->adminq_task);
+ queue_work(adapter->wq, &adapter->adminq_task);
return IRQ_HANDLED;
}
@@ -1034,7 +1033,7 @@ int iavf_replace_primary_mac(struct iavf_adapter *adapter,
/* schedule the watchdog task to immediately process the request */
if (f) {
- queue_work(iavf_wq, &adapter->watchdog_task.work);
+ mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
return 0;
}
return -ENOMEM;
@@ -1257,7 +1256,7 @@ static void iavf_up_complete(struct iavf_adapter *adapter)
adapter->aq_required |= IAVF_FLAG_AQ_ENABLE_QUEUES;
if (CLIENT_ENABLED(adapter))
adapter->flags |= IAVF_FLAG_CLIENT_NEEDS_OPEN;
- mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
+ mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
}
/**
@@ -1414,7 +1413,7 @@ void iavf_down(struct iavf_adapter *adapter)
adapter->aq_required |= IAVF_FLAG_AQ_DISABLE_QUEUES;
}
- mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
+ mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
}
/**
@@ -2248,7 +2247,7 @@ iavf_set_vlan_offload_features(struct iavf_adapter *adapter,
if (aq_required) {
adapter->aq_required |= aq_required;
- mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
+ mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
}
}
@@ -2693,6 +2692,15 @@ static void iavf_watchdog_task(struct work_struct *work)
goto restart_watchdog;
}
+ if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) &&
+ adapter->netdev_registered &&
+ !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section) &&
+ rtnl_trylock()) {
+ netdev_update_features(adapter->netdev);
+ rtnl_unlock();
+ adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES;
+ }
+
if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
iavf_change_state(adapter, __IAVF_COMM_FAILED);
@@ -2700,7 +2708,7 @@ static void iavf_watchdog_task(struct work_struct *work)
adapter->aq_required = 0;
adapter->current_op = VIRTCHNL_OP_UNKNOWN;
mutex_unlock(&adapter->crit_lock);
- queue_work(iavf_wq, &adapter->reset_task);
+ queue_work(adapter->wq, &adapter->reset_task);
return;
}
@@ -2708,31 +2716,31 @@ static void iavf_watchdog_task(struct work_struct *work)
case __IAVF_STARTUP:
iavf_startup(adapter);
mutex_unlock(&adapter->crit_lock);
- queue_delayed_work(iavf_wq, &adapter->watchdog_task,
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(30));
return;
case __IAVF_INIT_VERSION_CHECK:
iavf_init_version_check(adapter);
mutex_unlock(&adapter->crit_lock);
- queue_delayed_work(iavf_wq, &adapter->watchdog_task,
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(30));
return;
case __IAVF_INIT_GET_RESOURCES:
iavf_init_get_resources(adapter);
mutex_unlock(&adapter->crit_lock);
- queue_delayed_work(iavf_wq, &adapter->watchdog_task,
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(1));
return;
case __IAVF_INIT_EXTENDED_CAPS:
iavf_init_process_extended_caps(adapter);
mutex_unlock(&adapter->crit_lock);
- queue_delayed_work(iavf_wq, &adapter->watchdog_task,
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(1));
return;
case __IAVF_INIT_CONFIG_ADAPTER:
iavf_init_config_adapter(adapter);
mutex_unlock(&adapter->crit_lock);
- queue_delayed_work(iavf_wq, &adapter->watchdog_task,
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(1));
return;
case __IAVF_INIT_FAILED:
@@ -2751,14 +2759,14 @@ static void iavf_watchdog_task(struct work_struct *work)
adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED;
iavf_shutdown_adminq(hw);
mutex_unlock(&adapter->crit_lock);
- queue_delayed_work(iavf_wq,
+ queue_delayed_work(adapter->wq,
&adapter->watchdog_task, (5 * HZ));
return;
}
/* Try again from failed step*/
iavf_change_state(adapter, adapter->last_state);
mutex_unlock(&adapter->crit_lock);
- queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ);
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task, HZ);
return;
case __IAVF_COMM_FAILED:
if (test_bit(__IAVF_IN_REMOVE_TASK,
@@ -2789,13 +2797,14 @@ static void iavf_watchdog_task(struct work_struct *work)
adapter->aq_required = 0;
adapter->current_op = VIRTCHNL_OP_UNKNOWN;
mutex_unlock(&adapter->crit_lock);
- queue_delayed_work(iavf_wq,
+ queue_delayed_work(adapter->wq,
&adapter->watchdog_task,
msecs_to_jiffies(10));
return;
case __IAVF_RESETTING:
mutex_unlock(&adapter->crit_lock);
- queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2);
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+ HZ * 2);
return;
case __IAVF_DOWN:
case __IAVF_DOWN_PENDING:
@@ -2834,9 +2843,9 @@ static void iavf_watchdog_task(struct work_struct *work)
adapter->aq_required = 0;
adapter->current_op = VIRTCHNL_OP_UNKNOWN;
dev_err(&adapter->pdev->dev, "Hardware reset detected\n");
- queue_work(iavf_wq, &adapter->reset_task);
+ queue_work(adapter->wq, &adapter->reset_task);
mutex_unlock(&adapter->crit_lock);
- queue_delayed_work(iavf_wq,
+ queue_delayed_work(adapter->wq,
&adapter->watchdog_task, HZ * 2);
return;
}
@@ -2845,12 +2854,13 @@ static void iavf_watchdog_task(struct work_struct *work)
mutex_unlock(&adapter->crit_lock);
restart_watchdog:
if (adapter->state >= __IAVF_DOWN)
- queue_work(iavf_wq, &adapter->adminq_task);
+ queue_work(adapter->wq, &adapter->adminq_task);
if (adapter->aq_required)
- queue_delayed_work(iavf_wq, &adapter->watchdog_task,
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(20));
else
- queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2);
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+ HZ * 2);
}
/**
@@ -2952,7 +2962,7 @@ static void iavf_reset_task(struct work_struct *work)
*/
if (!mutex_trylock(&adapter->crit_lock)) {
if (adapter->state != __IAVF_REMOVE)
- queue_work(iavf_wq, &adapter->reset_task);
+ queue_work(adapter->wq, &adapter->reset_task);
goto reset_finish;
}
@@ -3116,7 +3126,7 @@ continue_reset:
bitmap_clear(adapter->vsi.active_cvlans, 0, VLAN_N_VID);
bitmap_clear(adapter->vsi.active_svlans, 0, VLAN_N_VID);
- mod_delayed_work(iavf_wq, &adapter->watchdog_task, 2);
+ mod_delayed_work(adapter->wq, &adapter->watchdog_task, 2);
/* We were running when the reset started, so we need to restore some
* state here.
@@ -3208,7 +3218,7 @@ static void iavf_adminq_task(struct work_struct *work)
if (adapter->state == __IAVF_REMOVE)
return;
- queue_work(iavf_wq, &adapter->adminq_task);
+ queue_work(adapter->wq, &adapter->adminq_task);
goto out;
}
@@ -3232,24 +3242,6 @@ static void iavf_adminq_task(struct work_struct *work)
} while (pending);
mutex_unlock(&adapter->crit_lock);
- if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES)) {
- if (adapter->netdev_registered ||
- !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) {
- struct net_device *netdev = adapter->netdev;
-
- rtnl_lock();
- netdev_update_features(netdev);
- rtnl_unlock();
- /* Request VLAN offload settings */
- if (VLAN_V2_ALLOWED(adapter))
- iavf_set_vlan_offload_features
- (adapter, 0, netdev->features);
-
- iavf_set_queue_vlan_tag_loc(adapter);
- }
-
- adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES;
- }
if ((adapter->flags &
(IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) ||
adapter->state == __IAVF_RESETTING)
@@ -3850,7 +3842,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
field_flags |= IAVF_CLOUD_FIELD_IIP;
} else {
dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n",
- be32_to_cpu(match.mask->dst));
+ be32_to_cpu(match.mask->src));
return -EINVAL;
}
}
@@ -4349,7 +4341,7 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu)
if (netif_running(netdev)) {
adapter->flags |= IAVF_FLAG_RESET_NEEDED;
- queue_work(iavf_wq, &adapter->reset_task);
+ queue_work(adapter->wq, &adapter->reset_task);
}
return 0;
@@ -4898,6 +4890,13 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
hw = &adapter->hw;
hw->back = adapter;
+ adapter->wq = alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM,
+ iavf_driver_name);
+ if (!adapter->wq) {
+ err = -ENOMEM;
+ goto err_alloc_wq;
+ }
+
adapter->msg_enable = BIT(DEFAULT_DEBUG_LEVEL_SHIFT) - 1;
iavf_change_state(adapter, __IAVF_STARTUP);
@@ -4942,7 +4941,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
INIT_WORK(&adapter->adminq_task, iavf_adminq_task);
INIT_DELAYED_WORK(&adapter->watchdog_task, iavf_watchdog_task);
INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task);
- queue_delayed_work(iavf_wq, &adapter->watchdog_task,
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(5 * (pdev->devfn & 0x07)));
/* Setup the wait queue for indicating transition to down status */
@@ -4954,6 +4953,8 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
return 0;
err_ioremap:
+ destroy_workqueue(adapter->wq);
+err_alloc_wq:
free_netdev(netdev);
err_alloc_etherdev:
pci_disable_pcie_error_reporting(pdev);
@@ -5023,7 +5024,7 @@ static int __maybe_unused iavf_resume(struct device *dev_d)
return err;
}
- queue_work(iavf_wq, &adapter->reset_task);
+ queue_work(adapter->wq, &adapter->reset_task);
netif_device_attach(adapter->netdev);
@@ -5170,6 +5171,8 @@ static void iavf_remove(struct pci_dev *pdev)
}
spin_unlock_bh(&adapter->adv_rss_lock);
+ destroy_workqueue(adapter->wq);
+
free_netdev(netdev);
pci_disable_pcie_error_reporting(pdev);
@@ -5196,24 +5199,11 @@ static struct pci_driver iavf_driver = {
**/
static int __init iavf_init_module(void)
{
- int ret;
-
pr_info("iavf: %s\n", iavf_driver_string);
pr_info("%s\n", iavf_copyright);
- iavf_wq = alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1,
- iavf_driver_name);
- if (!iavf_wq) {
- pr_err("%s: Failed to create workqueue\n", iavf_driver_name);
- return -ENOMEM;
- }
-
- ret = pci_register_driver(&iavf_driver);
- if (ret)
- destroy_workqueue(iavf_wq);
-
- return ret;
+ return pci_register_driver(&iavf_driver);
}
module_init(iavf_init_module);
@@ -5227,7 +5217,6 @@ module_init(iavf_init_module);
static void __exit iavf_exit_module(void)
{
pci_unregister_driver(&iavf_driver);
- destroy_workqueue(iavf_wq);
}
module_exit(iavf_exit_module);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 24a701fd140e..365ca0c710c4 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -1952,7 +1952,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
if (!(adapter->flags & IAVF_FLAG_RESET_PENDING)) {
adapter->flags |= IAVF_FLAG_RESET_PENDING;
dev_info(&adapter->pdev->dev, "Scheduling reset task\n");
- queue_work(iavf_wq, &adapter->reset_task);
+ queue_work(adapter->wq, &adapter->reset_task);
}
break;
default:
@@ -2226,6 +2226,14 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
iavf_process_config(adapter);
adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES;
+
+ /* Request VLAN offload settings */
+ if (VLAN_V2_ALLOWED(adapter))
+ iavf_set_vlan_offload_features(adapter, 0,
+ netdev->features);
+
+ iavf_set_queue_vlan_tag_loc(adapter);
+
was_mac_changed = !ether_addr_equal(netdev->dev_addr,
adapter->hw.mac.addr);
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 2f0b604abc5e..713069f809ec 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -880,7 +880,7 @@ void ice_set_ethtool_repr_ops(struct net_device *netdev);
void ice_set_ethtool_safe_mode_ops(struct net_device *netdev);
u16 ice_get_avail_txq_count(struct ice_pf *pf);
u16 ice_get_avail_rxq_count(struct ice_pf *pf);
-int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx);
+int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked);
void ice_update_vsi_stats(struct ice_vsi *vsi);
void ice_update_pf_stats(struct ice_pf *pf);
void
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index d02b55b6aa9c..3e08847505ce 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -5524,7 +5524,7 @@ bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw)
* returned by the firmware is a 16 bit * value, but is indexed
* by [fls(speed) - 1]
*/
-static const u32 ice_aq_to_link_speed[15] = {
+static const u32 ice_aq_to_link_speed[] = {
SPEED_10, /* BIT(0) */
SPEED_100,
SPEED_1000,
@@ -5536,10 +5536,6 @@ static const u32 ice_aq_to_link_speed[15] = {
SPEED_40000,
SPEED_50000,
SPEED_100000, /* BIT(10) */
- 0,
- 0,
- 0,
- 0 /* BIT(14) */
};
/**
@@ -5550,5 +5546,8 @@ static const u32 ice_aq_to_link_speed[15] = {
*/
u32 ice_get_link_speed(u16 index)
{
+ if (index >= ARRAY_SIZE(ice_aq_to_link_speed))
+ return 0;
+
return ice_aq_to_link_speed[index];
}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index 4f24d441c35e..0a55c552189a 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -441,7 +441,7 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
goto out;
}
- ice_pf_dcb_recfg(pf);
+ ice_pf_dcb_recfg(pf, false);
out:
/* enable previously downed VSIs */
@@ -731,12 +731,13 @@ static int ice_dcb_noncontig_cfg(struct ice_pf *pf)
/**
* ice_pf_dcb_recfg - Reconfigure all VEBs and VSIs
* @pf: pointer to the PF struct
+ * @locked: is adev device lock held
*
* Assumed caller has already disabled all VSIs before
* calling this function. Reconfiguring DCB based on
* local_dcbx_cfg.
*/
-void ice_pf_dcb_recfg(struct ice_pf *pf)
+void ice_pf_dcb_recfg(struct ice_pf *pf, bool locked)
{
struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
struct iidc_event *event;
@@ -783,14 +784,16 @@ void ice_pf_dcb_recfg(struct ice_pf *pf)
if (vsi->type == ICE_VSI_PF)
ice_dcbnl_set_all(vsi);
}
- /* Notify the AUX drivers that TC change is finished */
- event = kzalloc(sizeof(*event), GFP_KERNEL);
- if (!event)
- return;
+ if (!locked) {
+ /* Notify the AUX drivers that TC change is finished */
+ event = kzalloc(sizeof(*event), GFP_KERNEL);
+ if (!event)
+ return;
- set_bit(IIDC_EVENT_AFTER_TC_CHANGE, event->type);
- ice_send_event_to_aux(pf, event);
- kfree(event);
+ set_bit(IIDC_EVENT_AFTER_TC_CHANGE, event->type);
+ ice_send_event_to_aux(pf, event);
+ kfree(event);
+ }
}
/**
@@ -1044,7 +1047,7 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
}
/* changes in configuration update VSI */
- ice_pf_dcb_recfg(pf);
+ ice_pf_dcb_recfg(pf, false);
/* enable previously downed VSIs */
ice_dcb_ena_dis_vsi(pf, true, true);
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
index 4c421c842a13..800879a88c5e 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
@@ -23,7 +23,7 @@ u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index);
int
ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked);
int ice_dcb_bwchk(struct ice_pf *pf, struct ice_dcbx_cfg *dcbcfg);
-void ice_pf_dcb_recfg(struct ice_pf *pf);
+void ice_pf_dcb_recfg(struct ice_pf *pf, bool locked);
void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi);
int ice_init_pf_dcb(struct ice_pf *pf, bool locked);
void ice_update_dcb_stats(struct ice_pf *pf);
@@ -128,7 +128,7 @@ static inline u8 ice_get_pfc_mode(struct ice_pf *pf)
return 0;
}
-static inline void ice_pf_dcb_recfg(struct ice_pf *pf) { }
+static inline void ice_pf_dcb_recfg(struct ice_pf *pf, bool locked) { }
static inline void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi) { }
static inline void ice_update_dcb_stats(struct ice_pf *pf) { }
static inline void
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c
index 8286e47b4bae..0fae0186bd85 100644
--- a/drivers/net/ethernet/intel/ice/ice_devlink.c
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.c
@@ -899,7 +899,7 @@ static int ice_set_object_tx_priority(struct ice_port_info *pi, struct ice_sched
{
int status;
- if (node->tx_priority >= 8) {
+ if (priority >= 8) {
NL_SET_ERR_MSG_MOD(extack, "Priority should be less than 8");
return -EINVAL;
}
@@ -929,7 +929,7 @@ static int ice_set_object_tx_weight(struct ice_port_info *pi, struct ice_sched_n
{
int status;
- if (node->tx_weight > 200 || node->tx_weight < 1) {
+ if (weight > 200 || weight < 1) {
NL_SET_ERR_MSG_MOD(extack, "Weight must be between 1 and 200");
return -EINVAL;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 4191994d8f3a..a359f1610fc1 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3641,7 +3641,9 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch)
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
int new_rx = 0, new_tx = 0;
+ bool locked = false;
u32 curr_combined;
+ int ret = 0;
/* do not support changing channels in Safe Mode */
if (ice_is_safe_mode(pf)) {
@@ -3705,15 +3707,33 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch)
return -EINVAL;
}
- ice_vsi_recfg_qs(vsi, new_rx, new_tx);
+ if (pf->adev) {
+ mutex_lock(&pf->adev_mutex);
+ device_lock(&pf->adev->dev);
+ locked = true;
+ if (pf->adev->dev.driver) {
+ netdev_err(dev, "Cannot change channels when RDMA is active\n");
+ ret = -EBUSY;
+ goto adev_unlock;
+ }
+ }
+
+ ice_vsi_recfg_qs(vsi, new_rx, new_tx, locked);
- if (!netif_is_rxfh_configured(dev))
- return ice_vsi_set_dflt_rss_lut(vsi, new_rx);
+ if (!netif_is_rxfh_configured(dev)) {
+ ret = ice_vsi_set_dflt_rss_lut(vsi, new_rx);
+ goto adev_unlock;
+ }
/* Update rss_size due to change in Rx queues */
vsi->rss_size = ice_get_valid_rss_size(&pf->hw, new_rx);
- return 0;
+adev_unlock:
+ if (locked) {
+ device_unlock(&pf->adev->dev);
+ mutex_unlock(&pf->adev_mutex);
+ }
+ return ret;
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c
index b5a7f246d230..43e199b5b513 100644
--- a/drivers/net/ethernet/intel/ice/ice_gnss.c
+++ b/drivers/net/ethernet/intel/ice/ice_gnss.c
@@ -363,6 +363,7 @@ ice_gnss_tty_write(struct tty_struct *tty, const unsigned char *buf, int count)
/* Send the data out to a hardware port */
write_buf = kzalloc(sizeof(*write_buf), GFP_KERNEL);
if (!write_buf) {
+ kfree(cmd_buf);
err = -ENOMEM;
goto exit;
}
@@ -460,6 +461,9 @@ static struct tty_driver *ice_gnss_create_tty_driver(struct ice_pf *pf)
for (i = 0; i < ICE_GNSS_TTY_MINOR_DEVICES; i++) {
pf->gnss_tty_port[i] = kzalloc(sizeof(*pf->gnss_tty_port[i]),
GFP_KERNEL);
+ if (!pf->gnss_tty_port[i])
+ goto err_out;
+
pf->gnss_serial[i] = NULL;
tty_port_init(pf->gnss_tty_port[i]);
@@ -469,21 +473,23 @@ static struct tty_driver *ice_gnss_create_tty_driver(struct ice_pf *pf)
err = tty_register_driver(tty_driver);
if (err) {
dev_err(dev, "Failed to register TTY driver err=%d\n", err);
-
- for (i = 0; i < ICE_GNSS_TTY_MINOR_DEVICES; i++) {
- tty_port_destroy(pf->gnss_tty_port[i]);
- kfree(pf->gnss_tty_port[i]);
- }
- kfree(ttydrv_name);
- tty_driver_kref_put(pf->ice_gnss_tty_driver);
-
- return NULL;
+ goto err_out;
}
for (i = 0; i < ICE_GNSS_TTY_MINOR_DEVICES; i++)
dev_info(dev, "%s%d registered\n", ttydrv_name, i);
return tty_driver;
+
+err_out:
+ while (i--) {
+ tty_port_destroy(pf->gnss_tty_port[i]);
+ kfree(pf->gnss_tty_port[i]);
+ }
+ kfree(ttydrv_name);
+ tty_driver_kref_put(pf->ice_gnss_tty_driver);
+
+ return NULL;
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 94aa834cd9a6..a596e07b3ce9 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -3235,9 +3235,6 @@ int ice_vsi_release(struct ice_vsi *vsi)
}
}
- if (vsi->type == ICE_VSI_PF)
- ice_devlink_destroy_pf_port(pf);
-
if (vsi->type == ICE_VSI_VF &&
vsi->agg_node && vsi->agg_node->valid)
vsi->agg_node->num_vsis--;
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index a9a7f8b52140..8ec24f6cf6be 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -275,6 +275,8 @@ static int ice_set_promisc(struct ice_vsi *vsi, u8 promisc_m)
if (status && status != -EEXIST)
return status;
+ netdev_dbg(vsi->netdev, "set promisc filter bits for VSI %i: 0x%x\n",
+ vsi->vsi_num, promisc_m);
return 0;
}
@@ -300,6 +302,8 @@ static int ice_clear_promisc(struct ice_vsi *vsi, u8 promisc_m)
promisc_m, 0);
}
+ netdev_dbg(vsi->netdev, "clear promisc filter bits for VSI %i: 0x%x\n",
+ vsi->vsi_num, promisc_m);
return status;
}
@@ -414,6 +418,16 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
}
err = 0;
vlan_ops->dis_rx_filtering(vsi);
+
+ /* promiscuous mode implies allmulticast so
+ * that VSIs that are in promiscuous mode are
+ * subscribed to multicast packets coming to
+ * the port
+ */
+ err = ice_set_promisc(vsi,
+ ICE_MCAST_PROMISC_BITS);
+ if (err)
+ goto out_promisc;
}
} else {
/* Clear Rx filter to remove traffic from wire */
@@ -430,6 +444,18 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
NETIF_F_HW_VLAN_CTAG_FILTER)
vlan_ops->ena_rx_filtering(vsi);
}
+
+ /* disable allmulti here, but only if allmulti is not
+ * still enabled for the netdev
+ */
+ if (!(vsi->current_netdev_flags & IFF_ALLMULTI)) {
+ err = ice_clear_promisc(vsi,
+ ICE_MCAST_PROMISC_BITS);
+ if (err) {
+ netdev_err(netdev, "Error %d clearing multicast promiscuous on VSI %i\n",
+ err, vsi->vsi_num);
+ }
+ }
}
}
goto exit;
@@ -4195,12 +4221,13 @@ bool ice_is_wol_supported(struct ice_hw *hw)
* @vsi: VSI being changed
* @new_rx: new number of Rx queues
* @new_tx: new number of Tx queues
+ * @locked: is adev device_lock held
*
* Only change the number of queues if new_tx, or new_rx is non-0.
*
* Returns 0 on success.
*/
-int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx)
+int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked)
{
struct ice_pf *pf = vsi->back;
int err = 0, timeout = 50;
@@ -4229,7 +4256,7 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx)
ice_vsi_close(vsi);
ice_vsi_rebuild(vsi, false);
- ice_pf_dcb_recfg(pf);
+ ice_pf_dcb_recfg(pf, locked);
ice_vsi_open(vsi);
done:
clear_bit(ICE_CFG_BUSY, pf->state);
@@ -4590,7 +4617,7 @@ static void ice_print_wake_reason(struct ice_pf *pf)
}
/**
- * ice_register_netdev - register netdev and devlink port
+ * ice_register_netdev - register netdev
* @pf: pointer to the PF struct
*/
static int ice_register_netdev(struct ice_pf *pf)
@@ -4602,11 +4629,6 @@ static int ice_register_netdev(struct ice_pf *pf)
if (!vsi || !vsi->netdev)
return -EIO;
- err = ice_devlink_create_pf_port(pf);
- if (err)
- goto err_devlink_create;
-
- SET_NETDEV_DEVLINK_PORT(vsi->netdev, &pf->devlink_port);
err = register_netdev(vsi->netdev);
if (err)
goto err_register_netdev;
@@ -4617,8 +4639,6 @@ static int ice_register_netdev(struct ice_pf *pf)
return 0;
err_register_netdev:
- ice_devlink_destroy_pf_port(pf);
-err_devlink_create:
free_netdev(vsi->netdev);
vsi->netdev = NULL;
clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
@@ -4636,6 +4656,7 @@ static int
ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
{
struct device *dev = &pdev->dev;
+ struct ice_vsi *vsi;
struct ice_pf *pf;
struct ice_hw *hw;
int i, err;
@@ -4918,6 +4939,18 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
pcie_print_link_status(pf->pdev);
probe_done:
+ err = ice_devlink_create_pf_port(pf);
+ if (err)
+ goto err_create_pf_port;
+
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi || !vsi->netdev) {
+ err = -EINVAL;
+ goto err_netdev_reg;
+ }
+
+ SET_NETDEV_DEVLINK_PORT(vsi->netdev, &pf->devlink_port);
+
err = ice_register_netdev(pf);
if (err)
goto err_netdev_reg;
@@ -4955,6 +4988,8 @@ err_init_aux_unroll:
err_devlink_reg_param:
ice_devlink_unregister_params(pf);
err_netdev_reg:
+ ice_devlink_destroy_pf_port(pf);
+err_create_pf_port:
err_send_version_unroll:
ice_vsi_release_all(pf);
err_alloc_sw_unroll:
@@ -5083,6 +5118,7 @@ static void ice_remove(struct pci_dev *pdev)
ice_setup_mc_magic_wake(pf);
ice_vsi_release_all(pf);
mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
+ ice_devlink_destroy_pf_port(pf);
ice_set_wake(pf);
ice_free_irq_msix_misc(pf);
ice_for_each_vsi(pf, i) {
@@ -5531,7 +5567,7 @@ static int __init ice_module_init(void)
pr_info("%s\n", ice_driver_string);
pr_info("%s\n", ice_copyright);
- ice_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, KBUILD_MODNAME);
+ ice_wq = alloc_workqueue("%s", 0, 0, KBUILD_MODNAME);
if (!ice_wq) {
pr_err("Failed to create workqueue\n");
return -ENOMEM;
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 9b762f7972ce..61f844d22512 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -5420,7 +5420,7 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
*/
status = ice_add_special_words(rinfo, lkup_exts, ice_is_dvm_ena(hw));
if (status)
- goto err_free_lkup_exts;
+ goto err_unroll;
/* Group match words into recipes using preferred recipe grouping
* criteria.
diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
index faba0f857cd9..95f392ab9670 100644
--- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
@@ -1681,7 +1681,7 @@ ice_tc_forward_to_queue(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr,
struct ice_vsi *ch_vsi = NULL;
u16 queue = act->rx_queue;
- if (queue > vsi->num_rxq) {
+ if (queue >= vsi->num_rxq) {
NL_SET_ERR_MSG_MOD(fltr->extack,
"Unable to add filter because specified queue is invalid");
return -EINVAL;
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c
index d4a4001b6e5d..f56fa94ff3d0 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c
@@ -39,7 +39,7 @@ ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval,
return ice_sq_send_cmd(hw, &hw->mailboxq, &desc, msg, msglen, cd);
}
-static const u32 ice_legacy_aq_to_vc_speed[15] = {
+static const u32 ice_legacy_aq_to_vc_speed[] = {
VIRTCHNL_LINK_SPEED_100MB, /* BIT(0) */
VIRTCHNL_LINK_SPEED_100MB,
VIRTCHNL_LINK_SPEED_1GB,
@@ -51,10 +51,6 @@ static const u32 ice_legacy_aq_to_vc_speed[15] = {
VIRTCHNL_LINK_SPEED_40GB,
VIRTCHNL_LINK_SPEED_40GB,
VIRTCHNL_LINK_SPEED_40GB,
- VIRTCHNL_LINK_SPEED_UNKNOWN,
- VIRTCHNL_LINK_SPEED_UNKNOWN,
- VIRTCHNL_LINK_SPEED_UNKNOWN,
- VIRTCHNL_LINK_SPEED_UNKNOWN /* BIT(14) */
};
/**
@@ -71,21 +67,20 @@ static const u32 ice_legacy_aq_to_vc_speed[15] = {
*/
u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed)
{
- u32 speed;
+ /* convert a BIT() value into an array index */
+ u32 index = fls(link_speed) - 1;
- if (adv_link_support) {
- /* convert a BIT() value into an array index */
- speed = ice_get_link_speed(fls(link_speed) - 1);
- } else {
+ if (adv_link_support)
+ return ice_get_link_speed(index);
+ else if (index < ARRAY_SIZE(ice_legacy_aq_to_vc_speed))
/* Virtchnl speeds are not defined for every speed supported in
* the hardware. To maintain compatibility with older AVF
* drivers, while reporting the speed the new speed values are
* resolved to the closest known virtchnl speeds
*/
- speed = ice_legacy_aq_to_vc_speed[fls(link_speed) - 1];
- }
+ return ice_legacy_aq_to_vc_speed[index];
- return speed;
+ return VIRTCHNL_LINK_SPEED_UNKNOWN;
}
/* The mailbox overflow detection algorithm helps to check if there
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
index 5ecc0ee9a78e..b1ffb81893d4 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
@@ -44,13 +44,17 @@ void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi)
/* outer VLAN ops regardless of port VLAN config */
vlan_ops->add_vlan = ice_vsi_add_vlan;
- vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering;
vlan_ops->ena_tx_filtering = ice_vsi_ena_tx_vlan_filtering;
vlan_ops->dis_tx_filtering = ice_vsi_dis_tx_vlan_filtering;
if (ice_vf_is_port_vlan_ena(vf)) {
/* setup outer VLAN ops */
vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan;
+ /* all Rx traffic should be in the domain of the
+ * assigned port VLAN, so prevent disabling Rx VLAN
+ * filtering
+ */
+ vlan_ops->dis_rx_filtering = noop_vlan;
vlan_ops->ena_rx_filtering =
ice_vsi_ena_rx_vlan_filtering;
@@ -63,6 +67,9 @@ void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi)
vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
} else {
+ vlan_ops->dis_rx_filtering =
+ ice_vsi_dis_rx_vlan_filtering;
+
if (!test_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags))
vlan_ops->ena_rx_filtering = noop_vlan;
else
@@ -96,7 +103,14 @@ void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi)
vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan;
vlan_ops->ena_rx_filtering =
ice_vsi_ena_rx_vlan_filtering;
+ /* all Rx traffic should be in the domain of the
+ * assigned port VLAN, so prevent disabling Rx VLAN
+ * filtering
+ */
+ vlan_ops->dis_rx_filtering = noop_vlan;
} else {
+ vlan_ops->dis_rx_filtering =
+ ice_vsi_dis_rx_vlan_filtering;
if (!test_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags))
vlan_ops->ena_rx_filtering = noop_vlan;
else
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 907055b77af0..374b7f10b549 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -783,7 +783,7 @@ construct_skb:
static void
ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf)
{
- xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf);
+ page_frag_free(tx_buf->raw_buf);
xdp_ring->xdp_tx_active--;
dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
@@ -800,6 +800,7 @@ static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
struct ice_tx_desc *tx_desc;
u16 cnt = xdp_ring->count;
struct ice_tx_buf *tx_buf;
+ u16 completed_frames = 0;
u16 xsk_frames = 0;
u16 last_rs;
int i;
@@ -809,19 +810,21 @@ static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
if ((tx_desc->cmd_type_offset_bsz &
cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) {
if (last_rs >= ntc)
- xsk_frames = last_rs - ntc + 1;
+ completed_frames = last_rs - ntc + 1;
else
- xsk_frames = last_rs + cnt - ntc + 1;
+ completed_frames = last_rs + cnt - ntc + 1;
}
- if (!xsk_frames)
+ if (!completed_frames)
return;
- if (likely(!xdp_ring->xdp_tx_active))
+ if (likely(!xdp_ring->xdp_tx_active)) {
+ xsk_frames = completed_frames;
goto skip;
+ }
ntc = xdp_ring->next_to_clean;
- for (i = 0; i < xsk_frames; i++) {
+ for (i = 0; i < completed_frames; i++) {
tx_buf = &xdp_ring->tx_buf[ntc];
if (tx_buf->raw_buf) {
@@ -837,7 +840,7 @@ static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
}
skip:
tx_desc->cmd_type_offset_bsz = 0;
- xdp_ring->next_to_clean += xsk_frames;
+ xdp_ring->next_to_clean += completed_frames;
if (xdp_ring->next_to_clean >= cnt)
xdp_ring->next_to_clean -= cnt;
if (xsk_frames)
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 3c0c35ecea10..b5b443883da9 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2256,6 +2256,30 @@ static void igb_enable_mas(struct igb_adapter *adapter)
}
}
+#ifdef CONFIG_IGB_HWMON
+/**
+ * igb_set_i2c_bb - Init I2C interface
+ * @hw: pointer to hardware structure
+ **/
+static void igb_set_i2c_bb(struct e1000_hw *hw)
+{
+ u32 ctrl_ext;
+ s32 i2cctl;
+
+ ctrl_ext = rd32(E1000_CTRL_EXT);
+ ctrl_ext |= E1000_CTRL_I2C_ENA;
+ wr32(E1000_CTRL_EXT, ctrl_ext);
+ wrfl();
+
+ i2cctl = rd32(E1000_I2CPARAMS);
+ i2cctl |= E1000_I2CBB_EN
+ | E1000_I2C_CLK_OE_N
+ | E1000_I2C_DATA_OE_N;
+ wr32(E1000_I2CPARAMS, i2cctl);
+ wrfl();
+}
+#endif
+
void igb_reset(struct igb_adapter *adapter)
{
struct pci_dev *pdev = adapter->pdev;
@@ -2400,7 +2424,8 @@ void igb_reset(struct igb_adapter *adapter)
* interface.
*/
if (adapter->ets)
- mac->ops.init_thermal_sensor_thresh(hw);
+ igb_set_i2c_bb(hw);
+ mac->ops.init_thermal_sensor_thresh(hw);
}
}
#endif
@@ -3117,21 +3142,12 @@ static void igb_init_mas(struct igb_adapter *adapter)
**/
static s32 igb_init_i2c(struct igb_adapter *adapter)
{
- struct e1000_hw *hw = &adapter->hw;
s32 status = 0;
- s32 i2cctl;
/* I2C interface supported on i350 devices */
if (adapter->hw.mac.type != e1000_i350)
return 0;
- i2cctl = rd32(E1000_I2CPARAMS);
- i2cctl |= E1000_I2CBB_EN
- | E1000_I2C_CLK_OUT | E1000_I2C_CLK_OE_N
- | E1000_I2C_DATA_OUT | E1000_I2C_DATA_OE_N;
- wr32(E1000_I2CPARAMS, i2cctl);
- wrfl();
-
/* Initialize the i2c bus which is controlled by the registers.
* This bus will use the i2c_algo_bit structure that implements
* the protocol through toggling of the 4 bits in the register.
@@ -3521,6 +3537,12 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
adapter->ets = true;
else
adapter->ets = false;
+ /* Only enable I2C bit banging if an external thermal
+ * sensor is supported.
+ */
+ if (adapter->ets)
+ igb_set_i2c_bb(hw);
+ hw->mac.ops.init_thermal_sensor_thresh(hw);
if (igb_sysfs_init(adapter))
dev_err(&pdev->dev,
"failed to allocate sysfs resources\n");
@@ -6794,7 +6816,7 @@ static void igb_perout(struct igb_adapter *adapter, int tsintr_tt)
struct timespec64 ts;
u32 tsauxc;
- if (pin < 0 || pin >= IGB_N_PEROUT)
+ if (pin < 0 || pin >= IGB_N_SDP)
return;
spin_lock(&adapter->tmreg_lock);
@@ -6802,7 +6824,7 @@ static void igb_perout(struct igb_adapter *adapter, int tsintr_tt)
if (hw->mac.type == e1000_82580 ||
hw->mac.type == e1000_i354 ||
hw->mac.type == e1000_i350) {
- s64 ns = timespec64_to_ns(&adapter->perout[pin].period);
+ s64 ns = timespec64_to_ns(&adapter->perout[tsintr_tt].period);
u32 systiml, systimh, level_mask, level, rem;
u64 systim, now;
@@ -6850,8 +6872,8 @@ static void igb_perout(struct igb_adapter *adapter, int tsintr_tt)
ts.tv_nsec = (u32)systim;
ts.tv_sec = ((u32)(systim >> 32)) & 0xFF;
} else {
- ts = timespec64_add(adapter->perout[pin].start,
- adapter->perout[pin].period);
+ ts = timespec64_add(adapter->perout[tsintr_tt].start,
+ adapter->perout[tsintr_tt].period);
}
/* u32 conversion of tv_sec is safe until y2106 */
@@ -6860,7 +6882,7 @@ static void igb_perout(struct igb_adapter *adapter, int tsintr_tt)
tsauxc = rd32(E1000_TSAUXC);
tsauxc |= TSAUXC_EN_TT0;
wr32(E1000_TSAUXC, tsauxc);
- adapter->perout[pin].start = ts;
+ adapter->perout[tsintr_tt].start = ts;
spin_unlock(&adapter->tmreg_lock);
}
@@ -6874,7 +6896,7 @@ static void igb_extts(struct igb_adapter *adapter, int tsintr_tt)
struct ptp_clock_event event;
struct timespec64 ts;
- if (pin < 0 || pin >= IGB_N_EXTTS)
+ if (pin < 0 || pin >= IGB_N_SDP)
return;
if (hw->mac.type == e1000_82580 ||
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index a7b22639cfcd..e9747ec5ac0b 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -475,7 +475,9 @@
#define IGC_TSAUXC_EN_TT0 BIT(0) /* Enable target time 0. */
#define IGC_TSAUXC_EN_TT1 BIT(1) /* Enable target time 1. */
#define IGC_TSAUXC_EN_CLK0 BIT(2) /* Enable Configurable Frequency Clock 0. */
+#define IGC_TSAUXC_ST0 BIT(4) /* Start Clock 0 Toggle on Target Time 0. */
#define IGC_TSAUXC_EN_CLK1 BIT(5) /* Enable Configurable Frequency Clock 1. */
+#define IGC_TSAUXC_ST1 BIT(7) /* Start Clock 1 Toggle on Target Time 1. */
#define IGC_TSAUXC_EN_TS0 BIT(8) /* Enable hardware timestamp 0. */
#define IGC_TSAUXC_AUTT0 BIT(9) /* Auxiliary Timestamp Taken. */
#define IGC_TSAUXC_EN_TS1 BIT(10) /* Enable hardware timestamp 0. */
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 44b1740dc098..1dd2a7fee8d4 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2942,7 +2942,9 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
if (tx_buffer->next_to_watch &&
time_after(jiffies, tx_buffer->time_stamp +
(adapter->tx_timeout_factor * HZ)) &&
- !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) {
+ !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF) &&
+ (rd32(IGC_TDH(tx_ring->reg_idx)) !=
+ readl(tx_ring->tail))) {
/* detected Tx unit hang */
netdev_err(tx_ring->netdev,
"Detected Tx Unit Hang\n"
@@ -5069,6 +5071,24 @@ static int igc_change_mtu(struct net_device *netdev, int new_mtu)
}
/**
+ * igc_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ * @txqueue: queue number that timed out
+ **/
+static void igc_tx_timeout(struct net_device *netdev,
+ unsigned int __always_unused txqueue)
+{
+ struct igc_adapter *adapter = netdev_priv(netdev);
+ struct igc_hw *hw = &adapter->hw;
+
+ /* Do the reset outside of interrupt context */
+ adapter->tx_timeout_count++;
+ schedule_work(&adapter->reset_task);
+ wr32(IGC_EICS,
+ (adapter->eims_enable_mask & ~adapter->eims_other));
+}
+
+/**
* igc_get_stats64 - Get System Network Statistics
* @netdev: network interface device structure
* @stats: rtnl_link_stats64 pointer
@@ -5495,7 +5515,7 @@ static void igc_watchdog_task(struct work_struct *work)
case SPEED_100:
case SPEED_1000:
case SPEED_2500:
- adapter->tx_timeout_factor = 7;
+ adapter->tx_timeout_factor = 1;
break;
}
@@ -6320,6 +6340,7 @@ static const struct net_device_ops igc_netdev_ops = {
.ndo_set_rx_mode = igc_set_rx_mode,
.ndo_set_mac_address = igc_set_mac,
.ndo_change_mtu = igc_change_mtu,
+ .ndo_tx_timeout = igc_tx_timeout,
.ndo_get_stats64 = igc_get_stats64,
.ndo_fix_features = igc_fix_features,
.ndo_set_features = igc_set_features,
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
index 8dbb9f903ca7..4e10ced736db 100644
--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -322,7 +322,7 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp,
ts = ns_to_timespec64(ns);
if (rq->perout.index == 1) {
if (use_freq) {
- tsauxc_mask = IGC_TSAUXC_EN_CLK1;
+ tsauxc_mask = IGC_TSAUXC_EN_CLK1 | IGC_TSAUXC_ST1;
tsim_mask = 0;
} else {
tsauxc_mask = IGC_TSAUXC_EN_TT1;
@@ -333,7 +333,7 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp,
freqout = IGC_FREQOUT1;
} else {
if (use_freq) {
- tsauxc_mask = IGC_TSAUXC_EN_CLK0;
+ tsauxc_mask = IGC_TSAUXC_EN_CLK0 | IGC_TSAUXC_ST0;
tsim_mask = 0;
} else {
tsauxc_mask = IGC_TSAUXC_EN_TT0;
@@ -347,10 +347,12 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp,
tsauxc = rd32(IGC_TSAUXC);
tsim = rd32(IGC_TSIM);
if (rq->perout.index == 1) {
- tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1);
+ tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1 |
+ IGC_TSAUXC_ST1);
tsim &= ~IGC_TSICR_TT1;
} else {
- tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0);
+ tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0 |
+ IGC_TSAUXC_ST0);
tsim &= ~IGC_TSICR_TT0;
}
if (on) {
@@ -415,10 +417,12 @@ static int igc_ptp_verify_pin(struct ptp_clock_info *ptp, unsigned int pin,
*
* We need to convert the system time value stored in the RX/TXSTMP registers
* into a hwtstamp which can be used by the upper level timestamping functions.
+ *
+ * Returns 0 on success.
**/
-static void igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter,
- struct skb_shared_hwtstamps *hwtstamps,
- u64 systim)
+static int igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter,
+ struct skb_shared_hwtstamps *hwtstamps,
+ u64 systim)
{
switch (adapter->hw.mac.type) {
case igc_i225:
@@ -428,8 +432,9 @@ static void igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter,
systim & 0xFFFFFFFF);
break;
default:
- break;
+ return -EINVAL;
}
+ return 0;
}
/**
@@ -650,7 +655,8 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter)
regval = rd32(IGC_TXSTMPL);
regval |= (u64)rd32(IGC_TXSTMPH) << 32;
- igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
+ if (igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval))
+ return;
switch (adapter->link_speed) {
case SPEED_10:
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index bc68b8f2176d..8736ca4b2628 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -73,6 +73,8 @@
#define IXGBE_RXBUFFER_4K 4096
#define IXGBE_MAX_RXBUFFER 16384 /* largest size for a single descriptor */
+#define IXGBE_PKT_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
+
/* Attempt to maximize the headroom available for incoming frames. We
* use a 2K buffer for receives and need 1536/1534 to store the data for
* the frame. This leaves us with 512 bytes of room. From that we need
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index ab8370c413f3..4507fba8747a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6778,6 +6778,18 @@ static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter)
}
/**
+ * ixgbe_max_xdp_frame_size - returns the maximum allowed frame size for XDP
+ * @adapter: device handle, pointer to adapter
+ */
+static int ixgbe_max_xdp_frame_size(struct ixgbe_adapter *adapter)
+{
+ if (PAGE_SIZE >= 8192 || adapter->flags2 & IXGBE_FLAG2_RX_LEGACY)
+ return IXGBE_RXBUFFER_2K;
+ else
+ return IXGBE_RXBUFFER_3K;
+}
+
+/**
* ixgbe_change_mtu - Change the Maximum Transfer Unit
* @netdev: network interface device structure
* @new_mtu: new value for maximum frame size
@@ -6788,18 +6800,12 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu)
{
struct ixgbe_adapter *adapter = netdev_priv(netdev);
- if (adapter->xdp_prog) {
- int new_frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN +
- VLAN_HLEN;
- int i;
-
- for (i = 0; i < adapter->num_rx_queues; i++) {
- struct ixgbe_ring *ring = adapter->rx_ring[i];
+ if (ixgbe_enabled_xdp_adapter(adapter)) {
+ int new_frame_size = new_mtu + IXGBE_PKT_HDR_PAD;
- if (new_frame_size > ixgbe_rx_bufsz(ring)) {
- e_warn(probe, "Requested MTU size is not supported with XDP\n");
- return -EINVAL;
- }
+ if (new_frame_size > ixgbe_max_xdp_frame_size(adapter)) {
+ e_warn(probe, "Requested MTU size is not supported with XDP\n");
+ return -EINVAL;
}
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
index 24aa97f993ca..123dca9ce468 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
@@ -855,9 +855,11 @@ static struct pci_dev *ixgbe_get_first_secondary_devfn(unsigned int devfn)
rp_pdev = pci_get_domain_bus_and_slot(0, 0, devfn);
if (rp_pdev && rp_pdev->subordinate) {
bus = rp_pdev->subordinate->number;
+ pci_dev_put(rp_pdev);
return pci_get_domain_bus_and_slot(0, bus, 0);
}
+ pci_dev_put(rp_pdev);
return NULL;
}
@@ -874,6 +876,7 @@ static bool ixgbe_x550em_a_has_mii(struct ixgbe_hw *hw)
struct ixgbe_adapter *adapter = hw->back;
struct pci_dev *pdev = adapter->pdev;
struct pci_dev *func0_pdev;
+ bool has_mii = false;
/* For the C3000 family of SoCs (x550em_a) the internal ixgbe devices
* are always downstream of root ports @ 0000:00:16.0 & 0000:00:17.0
@@ -884,15 +887,16 @@ static bool ixgbe_x550em_a_has_mii(struct ixgbe_hw *hw)
func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x16, 0));
if (func0_pdev) {
if (func0_pdev == pdev)
- return true;
- else
- return false;
+ has_mii = true;
+ goto out;
}
func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x17, 0));
if (func0_pdev == pdev)
- return true;
+ has_mii = true;
- return false;
+out:
+ pci_dev_put(func0_pdev);
+ return has_mii;
}
/**
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index b2b71fe80d61..724df6398bbe 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -774,9 +774,9 @@ int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable)
cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG);
if (enable)
- cfg |= CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN;
+ cfg |= DATA_PKT_RX_EN | DATA_PKT_TX_EN;
else
- cfg &= ~(CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN);
+ cfg &= ~(DATA_PKT_RX_EN | DATA_PKT_TX_EN);
cgx_write(cgx, lmac_id, CGXX_CMRX_CFG, cfg);
return 0;
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
index fb2d37676d84..5a20d93004c7 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
@@ -26,7 +26,6 @@
#define CMR_P2X_SEL_SHIFT 59ULL
#define CMR_P2X_SEL_NIX0 1ULL
#define CMR_P2X_SEL_NIX1 2ULL
-#define CMR_EN BIT_ULL(55)
#define DATA_PKT_TX_EN BIT_ULL(53)
#define DATA_PKT_RX_EN BIT_ULL(54)
#define CGX_LMAC_TYPE_SHIFT 40
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c
index fa8029a94068..eb25e458266c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c
@@ -589,7 +589,7 @@ int rvu_mbox_handler_mcs_free_resources(struct rvu *rvu,
u16 pcifunc = req->hdr.pcifunc;
struct mcs_rsrc_map *map;
struct mcs *mcs;
- int rc;
+ int rc = 0;
if (req->mcs_id >= rvu->mcs_blk_cnt)
return MCS_AF_ERR_INVALID_MCSID;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
index bda1a6fa2ec4..e4407f09c9d3 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
@@ -1500,6 +1500,9 @@ static const struct devlink_param rvu_af_dl_params[] = {
BIT(DEVLINK_PARAM_CMODE_RUNTIME),
rvu_af_dl_dwrr_mtu_get, rvu_af_dl_dwrr_mtu_set,
rvu_af_dl_dwrr_mtu_validate),
+};
+
+static const struct devlink_param rvu_af_dl_param_exact_match[] = {
DEVLINK_PARAM_DRIVER(RVU_AF_DEVLINK_PARAM_ID_NPC_EXACT_FEATURE_DISABLE,
"npc_exact_feature_disable", DEVLINK_PARAM_TYPE_STRING,
BIT(DEVLINK_PARAM_CMODE_RUNTIME),
@@ -1556,7 +1559,6 @@ int rvu_register_dl(struct rvu *rvu)
{
struct rvu_devlink *rvu_dl;
struct devlink *dl;
- size_t size;
int err;
dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink),
@@ -1578,21 +1580,32 @@ int rvu_register_dl(struct rvu *rvu)
goto err_dl_health;
}
+ err = devlink_params_register(dl, rvu_af_dl_params, ARRAY_SIZE(rvu_af_dl_params));
+ if (err) {
+ dev_err(rvu->dev,
+ "devlink params register failed with error %d", err);
+ goto err_dl_health;
+ }
+
/* Register exact match devlink only for CN10K-B */
- size = ARRAY_SIZE(rvu_af_dl_params);
if (!rvu_npc_exact_has_match_table(rvu))
- size -= 1;
+ goto done;
- err = devlink_params_register(dl, rvu_af_dl_params, size);
+ err = devlink_params_register(dl, rvu_af_dl_param_exact_match,
+ ARRAY_SIZE(rvu_af_dl_param_exact_match));
if (err) {
dev_err(rvu->dev,
- "devlink params register failed with error %d", err);
- goto err_dl_health;
+ "devlink exact match params register failed with error %d", err);
+ goto err_dl_exact_match;
}
+done:
devlink_register(dl);
return 0;
+err_dl_exact_match:
+ devlink_params_unregister(dl, rvu_af_dl_params, ARRAY_SIZE(rvu_af_dl_params));
+
err_dl_health:
rvu_health_reporters_destroy(rvu);
devlink_free(dl);
@@ -1605,8 +1618,14 @@ void rvu_unregister_dl(struct rvu *rvu)
struct devlink *dl = rvu_dl->dl;
devlink_unregister(dl);
- devlink_params_unregister(dl, rvu_af_dl_params,
- ARRAY_SIZE(rvu_af_dl_params));
+
+ devlink_params_unregister(dl, rvu_af_dl_params, ARRAY_SIZE(rvu_af_dl_params));
+
+ /* Unregister exact match devlink only for CN10K-B */
+ if (rvu_npc_exact_has_match_table(rvu))
+ devlink_params_unregister(dl, rvu_af_dl_param_exact_match,
+ ARRAY_SIZE(rvu_af_dl_param_exact_match));
+
rvu_health_reporters_destroy(rvu);
devlink_free(dl);
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 9e10e7471b88..8a41ad8ca04f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -1376,18 +1376,23 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf)
sq = &qset->sq[qidx];
sq->sqb_count = 0;
sq->sqb_ptrs = kcalloc(num_sqbs, sizeof(*sq->sqb_ptrs), GFP_KERNEL);
- if (!sq->sqb_ptrs)
- return -ENOMEM;
+ if (!sq->sqb_ptrs) {
+ err = -ENOMEM;
+ goto err_mem;
+ }
for (ptr = 0; ptr < num_sqbs; ptr++) {
- if (otx2_alloc_rbuf(pfvf, pool, &bufptr))
- return -ENOMEM;
+ err = otx2_alloc_rbuf(pfvf, pool, &bufptr);
+ if (err)
+ goto err_mem;
pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr);
sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr;
}
}
- return 0;
+err_mem:
+ return err ? -ENOMEM : 0;
+
fail:
otx2_mbox_reset(&pfvf->mbox.mbox, 0);
otx2_aura_pool_free(pfvf);
@@ -1430,13 +1435,13 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf)
for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) {
pool = &pfvf->qset.pool[pool_id];
for (ptr = 0; ptr < num_ptrs; ptr++) {
- if (otx2_alloc_rbuf(pfvf, pool, &bufptr))
+ err = otx2_alloc_rbuf(pfvf, pool, &bufptr);
+ if (err)
return -ENOMEM;
pfvf->hw_ops->aura_freeptr(pfvf, pool_id,
bufptr + OTX2_HEAD_ROOM);
}
}
-
return 0;
fail:
otx2_mbox_reset(&pfvf->mbox.mbox, 0);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 5bee3c3a7ce4..3d22cc6a2804 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -736,8 +736,10 @@ static inline void cn10k_aura_freeptr(void *dev, int aura, u64 buf)
u64 ptrs[2];
ptrs[1] = buf;
+ get_cpu();
/* Free only one buffer at time during init and teardown */
__cn10k_aura_freeptr(pfvf, aura, ptrs, 2);
+ put_cpu();
}
/* Alloc pointer from pool/aura */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
index 86653bb8e403..7f8ffbf79cf7 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
@@ -758,6 +758,8 @@ static void otx2vf_remove(struct pci_dev *pdev)
if (vf->otx2_wq)
destroy_workqueue(vf->otx2_wq);
otx2_ptp_destroy(vf);
+ otx2_mcam_flow_del(vf);
+ otx2_shutdown_tc(vf);
otx2vf_disable_mbox_intr(vf);
otx2_detach_resources(&vf->mbox);
if (test_bit(CN10K_LMTST, &vf->hw.cap_flag))
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index e3de9a53b2d9..e3123723522e 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1570,8 +1570,8 @@ static struct page_pool *mtk_create_page_pool(struct mtk_eth *eth,
if (IS_ERR(pp))
return pp;
- err = __xdp_rxq_info_reg(xdp_q, &eth->dummy_dev, eth->rx_napi.napi_id,
- id, PAGE_SIZE);
+ err = __xdp_rxq_info_reg(xdp_q, &eth->dummy_dev, id,
+ eth->rx_napi.napi_id, PAGE_SIZE);
if (err < 0)
goto err_free_pp;
@@ -1870,7 +1870,9 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
while (done < budget) {
unsigned int pktlen, *rxdcsum;
+ bool has_hwaccel_tag = false;
struct net_device *netdev;
+ u16 vlan_proto, vlan_tci;
dma_addr_t dma_addr;
u32 hash, reason;
int mac = 0;
@@ -2010,27 +2012,29 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) {
if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
- if (trxd.rxd3 & RX_DMA_VTAG_V2)
- __vlan_hwaccel_put_tag(skb,
- htons(RX_DMA_VPID(trxd.rxd4)),
- RX_DMA_VID(trxd.rxd4));
+ if (trxd.rxd3 & RX_DMA_VTAG_V2) {
+ vlan_proto = RX_DMA_VPID(trxd.rxd4);
+ vlan_tci = RX_DMA_VID(trxd.rxd4);
+ has_hwaccel_tag = true;
+ }
} else if (trxd.rxd2 & RX_DMA_VTAG) {
- __vlan_hwaccel_put_tag(skb, htons(RX_DMA_VPID(trxd.rxd3)),
- RX_DMA_VID(trxd.rxd3));
+ vlan_proto = RX_DMA_VPID(trxd.rxd3);
+ vlan_tci = RX_DMA_VID(trxd.rxd3);
+ has_hwaccel_tag = true;
}
}
/* When using VLAN untagging in combination with DSA, the
* hardware treats the MTK special tag as a VLAN and untags it.
*/
- if (skb_vlan_tag_present(skb) && netdev_uses_dsa(netdev)) {
- unsigned int port = ntohs(skb->vlan_proto) & GENMASK(2, 0);
+ if (has_hwaccel_tag && netdev_uses_dsa(netdev)) {
+ unsigned int port = vlan_proto & GENMASK(2, 0);
if (port < ARRAY_SIZE(eth->dsa_meta) &&
eth->dsa_meta[port])
skb_dst_set_noref(skb, &eth->dsa_meta[port]->dst);
-
- __vlan_hwaccel_clear_tag(skb);
+ } else if (has_hwaccel_tag) {
+ __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vlan_tci);
}
skb_record_rx_queue(skb, 0);
@@ -3111,7 +3115,7 @@ static void mtk_gdm_config(struct mtk_eth *eth, u32 config)
val |= config;
- if (!i && eth->netdev[0] && netdev_uses_dsa(eth->netdev[0]))
+ if (eth->netdev[i] && netdev_uses_dsa(eth->netdev[i]))
val |= MTK_GDMA_SPECIAL_TAG;
mtk_w32(eth, val, MTK_GDMA_FWD_CFG(i));
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 18a50529ce7b..2d9186d32bc0 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -519,7 +519,7 @@
#define SGMII_SPEED_10 FIELD_PREP(SGMII_SPEED_MASK, 0)
#define SGMII_SPEED_100 FIELD_PREP(SGMII_SPEED_MASK, 1)
#define SGMII_SPEED_1000 FIELD_PREP(SGMII_SPEED_MASK, 2)
-#define SGMII_DUPLEX_FULL BIT(4)
+#define SGMII_DUPLEX_HALF BIT(4)
#define SGMII_IF_MODE_BIT5 BIT(5)
#define SGMII_REMOTE_FAULT_DIS BIT(8)
#define SGMII_CODE_SYNC_SET_VAL BIT(9)
@@ -1036,11 +1036,13 @@ struct mtk_soc_data {
* @regmap: The register map pointing at the range used to setup
* SGMII modes
* @ana_rgc3: The offset refers to register ANA_RGC3 related to regmap
+ * @interface: Currently configured interface mode
* @pcs: Phylink PCS structure
*/
struct mtk_pcs {
struct regmap *regmap;
u32 ana_rgc3;
+ phy_interface_t interface;
struct phylink_pcs pcs;
};
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
index 269208a841c7..1ff024f42444 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
@@ -615,8 +615,7 @@ mtk_foe_entry_commit_subflow(struct mtk_ppe *ppe, struct mtk_flow_entry *entry,
u32 ib1_mask = mtk_get_ib1_pkt_type_mask(ppe->eth) | MTK_FOE_IB1_UDP;
int type;
- flow_info = kzalloc(offsetof(struct mtk_flow_entry, l2_data.end),
- GFP_ATOMIC);
+ flow_info = kzalloc(sizeof(*flow_info), GFP_ATOMIC);
if (!flow_info)
return;
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
index ea64fac1d425..b5e432031340 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe.h
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
@@ -279,7 +279,6 @@ struct mtk_flow_entry {
struct {
struct mtk_flow_entry *base_flow;
struct hlist_node list;
- struct {} end;
} l2_data;
};
struct rhash_head node;
diff --git a/drivers/net/ethernet/mediatek/mtk_sgmii.c b/drivers/net/ethernet/mediatek/mtk_sgmii.c
index 5c286f2c9418..bb00de1003ac 100644
--- a/drivers/net/ethernet/mediatek/mtk_sgmii.c
+++ b/drivers/net/ethernet/mediatek/mtk_sgmii.c
@@ -43,11 +43,6 @@ static int mtk_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
int advertise, link_timer;
bool changed, use_an;
- if (interface == PHY_INTERFACE_MODE_2500BASEX)
- rgc3 = RG_PHY_SPEED_3_125G;
- else
- rgc3 = 0;
-
advertise = phylink_mii_c22_pcs_encode_advertisement(interface,
advertising);
if (advertise < 0)
@@ -88,9 +83,22 @@ static int mtk_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
bmcr = 0;
}
- /* Configure the underlying interface speed */
- regmap_update_bits(mpcs->regmap, mpcs->ana_rgc3,
- RG_PHY_SPEED_3_125G, rgc3);
+ if (mpcs->interface != interface) {
+ /* PHYA power down */
+ regmap_update_bits(mpcs->regmap, SGMSYS_QPHY_PWR_STATE_CTRL,
+ SGMII_PHYA_PWD, SGMII_PHYA_PWD);
+
+ if (interface == PHY_INTERFACE_MODE_2500BASEX)
+ rgc3 = RG_PHY_SPEED_3_125G;
+ else
+ rgc3 = 0;
+
+ /* Configure the underlying interface speed */
+ regmap_update_bits(mpcs->regmap, mpcs->ana_rgc3,
+ RG_PHY_SPEED_3_125G, rgc3);
+
+ mpcs->interface = interface;
+ }
/* Update the advertisement, noting whether it has changed */
regmap_update_bits_check(mpcs->regmap, SGMSYS_PCS_ADVERTISE,
@@ -108,9 +116,17 @@ static int mtk_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
regmap_update_bits(mpcs->regmap, SGMSYS_PCS_CONTROL_1,
SGMII_AN_RESTART | SGMII_AN_ENABLE, bmcr);
- /* Release PHYA power down state */
- regmap_update_bits(mpcs->regmap, SGMSYS_QPHY_PWR_STATE_CTRL,
- SGMII_PHYA_PWD, 0);
+ /* Release PHYA power down state
+ * Only removing bit SGMII_PHYA_PWD isn't enough.
+ * There are cases when the SGMII_PHYA_PWD register contains 0x9 which
+ * prevents SGMII from working. The SGMII still shows link but no traffic
+ * can flow. Writing 0x0 to the PHYA_PWD register fix the issue. 0x0 was
+ * taken from a good working state of the SGMII interface.
+ * Unknown how much the QPHY needs but it is racy without a sleep.
+ * Tested on mt7622 & mt7986.
+ */
+ usleep_range(50, 100);
+ regmap_write(mpcs->regmap, SGMSYS_QPHY_PWR_STATE_CTRL, 0);
return changed;
}
@@ -138,11 +154,11 @@ static void mtk_pcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
else
sgm_mode = SGMII_SPEED_1000;
- if (duplex == DUPLEX_FULL)
- sgm_mode |= SGMII_DUPLEX_FULL;
+ if (duplex != DUPLEX_FULL)
+ sgm_mode |= SGMII_DUPLEX_HALF;
regmap_update_bits(mpcs->regmap, SGMSYS_SGMII_MODE,
- SGMII_DUPLEX_FULL | SGMII_SPEED_MASK,
+ SGMII_DUPLEX_HALF | SGMII_SPEED_MASK,
sgm_mode);
}
}
@@ -171,6 +187,8 @@ int mtk_sgmii_init(struct mtk_sgmii *ss, struct device_node *r, u32 ana_rgc3)
return PTR_ERR(ss->pcs[i].regmap);
ss->pcs[i].pcs.ops = &mtk_pcs_ops;
+ ss->pcs[i].pcs.poll = true;
+ ss->pcs[i].interface = PHY_INTERFACE_MODE_NA;
}
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index d3ca745d107d..c837103a9ee3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -2176,15 +2176,9 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
return -EINVAL;
}
- cmd->stats = kvcalloc(MLX5_CMD_OP_MAX, sizeof(*cmd->stats), GFP_KERNEL);
- if (!cmd->stats)
- return -ENOMEM;
-
cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0);
- if (!cmd->pool) {
- err = -ENOMEM;
- goto dma_pool_err;
- }
+ if (!cmd->pool)
+ return -ENOMEM;
err = alloc_cmd_page(dev, cmd);
if (err)
@@ -2268,8 +2262,6 @@ err_free_page:
err_free_pool:
dma_pool_destroy(cmd->pool);
-dma_pool_err:
- kvfree(cmd->stats);
return err;
}
@@ -2282,7 +2274,6 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev)
destroy_msg_cache(dev);
free_cmd_page(dev, cmd);
dma_pool_destroy(cmd->pool);
- kvfree(cmd->stats);
}
void mlx5_cmd_set_state(struct mlx5_core_dev *dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index 3e232a65a0c3..bb95b40d25eb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -245,8 +245,9 @@ void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev)
pages = dev->priv.dbg.pages_debugfs;
debugfs_create_u32("fw_pages_total", 0400, pages, &dev->priv.fw_pages);
- debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.vfs_pages);
- debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.host_pf_pages);
+ debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.page_counters[MLX5_VF]);
+ debugfs_create_u32("fw_pages_sfs", 0400, pages, &dev->priv.page_counters[MLX5_SF]);
+ debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.page_counters[MLX5_HOST_PF]);
debugfs_create_u32("fw_pages_alloc_failed", 0400, pages, &dev->priv.fw_pages_alloc_failed);
debugfs_create_u32("fw_pages_give_dropped", 0400, pages, &dev->priv.give_pages_dropped);
debugfs_create_u32("fw_pages_reclaim_discard", 0400, pages,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index ddb197970c22..5bd83c0275f8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -468,7 +468,7 @@ static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id,
bool new_state = val.vbool;
if (new_state && !MLX5_CAP_GEN(dev, roce) &&
- !MLX5_CAP_GEN(dev, roce_rw_supported)) {
+ !(MLX5_CAP_GEN(dev, roce_rw_supported) && MLX5_CAP_GEN_MAX(dev, roce))) {
NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE");
return -EOPNOTSUPP;
}
@@ -563,7 +563,7 @@ static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id,
union devlink_param_value val,
struct netlink_ext_ack *extack)
{
- return (val.vu16 >= 64 && val.vu16 <= 4096) ? 0 : -EINVAL;
+ return (val.vu32 >= 64 && val.vu32 <= 4096) ? 0 : -EINVAL;
}
static const struct devlink_param mlx5_devlink_params[] = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index 21831386b26e..5b05b884b5fb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -64,6 +64,7 @@ static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer)
MLX5_GET(mtrc_cap, out, num_string_trace);
tracer->str_db.num_string_db = MLX5_GET(mtrc_cap, out, num_string_db);
tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner);
+ tracer->str_db.loaded = false;
for (i = 0; i < tracer->str_db.num_string_db; i++) {
mtrc_cap_sp = MLX5_ADDR_OF(mtrc_cap, out, string_db_param[i]);
@@ -756,6 +757,7 @@ static int mlx5_fw_tracer_set_mtrc_conf(struct mlx5_fw_tracer *tracer)
if (err)
mlx5_core_warn(dev, "FWTracer: Failed to set tracer configurations %d\n", err);
+ tracer->buff.consumer_index = 0;
return err;
}
@@ -820,7 +822,6 @@ static void mlx5_fw_tracer_ownership_change(struct work_struct *work)
mlx5_core_dbg(tracer->dev, "FWTracer: ownership changed, current=(%d)\n", tracer->owner);
if (tracer->owner) {
tracer->owner = false;
- tracer->buff.consumer_index = 0;
return;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
index 464eb3a18450..cdc87ecae5d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
@@ -87,7 +87,7 @@ void mlx5_ec_cleanup(struct mlx5_core_dev *dev)
mlx5_host_pf_cleanup(dev);
- err = mlx5_wait_for_pages(dev, &dev->priv.host_pf_pages);
+ err = mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_HOST_PF]);
if (err)
mlx5_core_warn(dev, "Timeout reclaiming external host PF pages err(%d)\n", err);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c
index 6dac76fa58a3..09d441ecb9f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c
@@ -637,7 +637,7 @@ mlx5e_htb_update_children(struct mlx5e_htb *htb, struct mlx5e_qos_node *node,
if (child->bw_share == old_bw_share)
continue;
- err_one = mlx5_qos_update_node(htb->mdev, child->hw_id, child->bw_share,
+ err_one = mlx5_qos_update_node(htb->mdev, child->bw_share,
child->max_average_bw, child->hw_id);
if (!err && err_one) {
err = err_one;
@@ -671,7 +671,7 @@ mlx5e_htb_node_modify(struct mlx5e_htb *htb, u16 classid, u64 rate, u64 ceil,
mlx5e_htb_convert_rate(htb, rate, node->parent, &bw_share);
mlx5e_htb_convert_ceil(htb, ceil, &max_average_bw);
- err = mlx5_qos_update_node(htb->mdev, node->parent->hw_id, bw_share,
+ err = mlx5_qos_update_node(htb->mdev, bw_share,
max_average_bw, node->hw_id);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a node.");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 585bdc8383ee..4ad19c981294 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -578,7 +578,6 @@ int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *pa
{
enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
- bool unaligned = xsk ? xsk->unaligned : false;
u16 max_mtu_pkts;
if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode))
@@ -591,7 +590,7 @@ int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *pa
* needed number of WQEs exceeds the maximum.
*/
max_mtu_pkts = min_t(u8, MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE,
- mlx5e_mpwrq_max_log_rq_pkts(mdev, page_shift, unaligned));
+ mlx5e_mpwrq_max_log_rq_pkts(mdev, page_shift, xsk->unaligned));
if (params->log_rq_mtu_frames > max_mtu_pkts) {
mlx5_core_err(mdev, "Current RQ length %d is too big for XSK with given frame size %u\n",
1 << params->log_rq_mtu_frames, xsk->chunk_size);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
index 8099a21e674c..ce85b48d327d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
@@ -438,10 +438,6 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb,
switch (event) {
case SWITCHDEV_FDB_ADD_TO_BRIDGE:
- /* only handle the event on native eswtich of representor */
- if (!mlx5_esw_bridge_is_local(dev, rep, esw))
- break;
-
fdb_info = container_of(info,
struct switchdev_notifier_fdb_info,
info);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index 5f6f95ad6888..1ae15b8536a8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -459,7 +459,11 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter,
goto unlock;
for (i = 0; i < priv->channels.num; i++) {
- struct mlx5e_rq *rq = &priv->channels.c[i]->rq;
+ struct mlx5e_channel *c = priv->channels.c[i];
+ struct mlx5e_rq *rq;
+
+ rq = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state) ?
+ &c->xskrq : &c->rq;
err = mlx5e_rx_reporter_build_diagnose_output(rq, fmsg);
if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
index 512d43148922..c4378afdec09 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
@@ -34,12 +34,6 @@ static int police_act_validate(const struct flow_action_entry *act,
return -EOPNOTSUPP;
}
- if (act->police.rate_pkt_ps) {
- NL_SET_ERR_MSG_MOD(extack,
- "QoS offload not support packets per second");
- return -EOPNOTSUPP;
- }
-
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c
index 8d7d761482d2..50b60fd00946 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c
@@ -127,6 +127,7 @@ mlx5e_post_meter_add_rule(struct mlx5e_priv *priv,
attr->counter = act_counter;
attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
+ attr->inner_match_level = MLX5_MATCH_NONE;
attr->outer_match_level = MLX5_MATCH_NONE;
attr->chain = 0;
attr->prio = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
index 1cbd2eb9d04f..f2c2c752bd1c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
@@ -477,7 +477,6 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
struct mlx5e_sample_flow *sample_flow;
struct mlx5e_sample_attr *sample_attr;
struct mlx5_flow_attr *pre_attr;
- u32 tunnel_id = attr->tunnel_id;
struct mlx5_eswitch *esw;
u32 default_tbl_id;
u32 obj_id;
@@ -522,7 +521,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
restore_obj.sample.group_id = sample_attr->group_num;
restore_obj.sample.rate = sample_attr->rate;
restore_obj.sample.trunc_size = sample_attr->trunc_size;
- restore_obj.sample.tunnel_id = tunnel_id;
+ restore_obj.sample.tunnel_id = attr->tunnel_id;
err = mapping_add(esw->offloads.reg_c0_obj_pool, &restore_obj, &obj_id);
if (err)
goto err_obj_id;
@@ -548,7 +547,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
/* For decap action, do decap in the original flow table instead of the
* default flow table.
*/
- if (tunnel_id)
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
pre_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
pre_attr->modify_hdr = sample_flow->restore->modify_hdr;
pre_attr->flags = MLX5_ATTR_FLAG_SAMPLE;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index a69849e0deed..313df8232db7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -2103,14 +2103,9 @@ out_err:
static void
mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
{
- bool is_fdb = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB;
struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs;
- char dirname[16] = {};
- if (sscanf(dirname, "ct_%s", is_fdb ? "fdb" : "nic") < 0)
- return;
-
- ct_dbgfs->root = debugfs_create_dir(dirname, mlx5_debugfs_get_dev_root(ct_priv->dev));
+ ct_dbgfs->root = debugfs_create_dir("ct", mlx5_debugfs_get_dev_root(ct_priv->dev));
debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root,
&ct_dbgfs->stats.offloaded);
debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index ff73d25bc6eb..2aaf8ab857b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -222,7 +222,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
int err;
list_for_each_entry(flow, flow_list, tmp_list) {
- if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW))
+ if (!mlx5e_is_offloaded_flow(flow))
continue;
attr = mlx5e_tc_get_encap_attr(flow);
@@ -231,6 +231,13 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
+ /* Clear pkt_reformat before checking slow path flag. Because
+ * in next iteration, the same flow is already set slow path
+ * flag, but still need to clear the pkt_reformat.
+ */
+ if (flow_flag_test(flow, SLOW))
+ continue;
+
/* update from encap rule to slow path rule */
spec = &flow->attr->parse_attr->spec;
rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
index f5b26f5a7de4..054d80c4e65c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
@@ -273,6 +273,11 @@ static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv,
geneve_tlv_option_0_data, be32_to_cpu(opt_data_key));
MLX5_SET(fte_match_set_misc3, misc_3_c,
geneve_tlv_option_0_data, be32_to_cpu(opt_data_mask));
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.geneve_tlv_option_0_exist)) {
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_tlv_option_0_exist);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc_v, geneve_tlv_option_0_exist);
+ }
spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
index fd07c4cbfd1d..1f62c702b625 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
@@ -88,6 +88,8 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[],
struct udphdr *udp = (struct udphdr *)(buf);
struct vxlanhdr *vxh;
+ if (tun_key->tun_flags & TUNNEL_VXLAN_OPT)
+ return -EOPNOTSUPP;
vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
*ip_proto = IPPROTO_UDP;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index a92e19c4c499..8bed9c361075 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -122,11 +122,8 @@ struct mlx5e_ipsec_aso {
u8 ctx[MLX5_ST_SZ_BYTES(ipsec_aso)];
dma_addr_t dma_addr;
struct mlx5_aso *aso;
- /* IPsec ASO caches data on every query call,
- * so in nested calls, we can use this boolean to save
- * recursive calls to mlx5e_ipsec_aso_query()
- */
- u8 use_cache : 1;
+ /* Protect ASO WQ access, as it is global to whole IPsec */
+ spinlock_t lock;
};
struct mlx5e_ipsec {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
index 8e3614218fc4..2461462b7b99 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
@@ -320,7 +320,6 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work)
if (ret)
goto unlock;
- aso->use_cache = true;
if (attrs->esn_trigger &&
!MLX5_GET(ipsec_aso, aso->ctx, esn_event_arm)) {
u32 mode_param = MLX5_GET(ipsec_aso, aso->ctx, mode_parameter);
@@ -333,7 +332,6 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work)
!MLX5_GET(ipsec_aso, aso->ctx, hard_lft_arm) ||
!MLX5_GET(ipsec_aso, aso->ctx, remove_flow_enable))
xfrm_state_check_expire(sa_entry->x);
- aso->use_cache = false;
unlock:
spin_unlock(&sa_entry->x->lock);
@@ -398,6 +396,7 @@ int mlx5e_ipsec_aso_init(struct mlx5e_ipsec *ipsec)
goto err_aso_create;
}
+ spin_lock_init(&aso->lock);
ipsec->nb.notifier_call = mlx5e_ipsec_event;
mlx5_notifier_register(mdev, &ipsec->nb);
@@ -456,13 +455,12 @@ int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry,
struct mlx5e_hw_objs *res;
struct mlx5_aso_wqe *wqe;
u8 ds_cnt;
+ int ret;
lockdep_assert_held(&sa_entry->x->lock);
- if (aso->use_cache)
- return 0;
-
res = &mdev->mlx5e_res.hw_objs;
+ spin_lock_bh(&aso->lock);
memset(aso->ctx, 0, sizeof(aso->ctx));
wqe = mlx5_aso_get_wqe(aso->aso);
ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS);
@@ -477,7 +475,9 @@ int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry,
mlx5e_ipsec_aso_copy(ctrl, data);
mlx5_aso_post_wqe(aso->aso, false, &wqe->ctrl);
- return mlx5_aso_poll_cq(aso->aso, false);
+ ret = mlx5_aso_poll_cq(aso->aso, false);
+ spin_unlock_bh(&aso->lock);
+ return ret;
}
void mlx5e_ipsec_aso_update_curlft(struct mlx5e_ipsec_sa_entry *sa_entry,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
index 9369a580743e..7f6b940830b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
@@ -62,6 +62,7 @@ struct mlx5e_macsec_sa {
u32 enc_key_id;
u32 next_pn;
sci_t sci;
+ ssci_t ssci;
salt_t salt;
struct rhash_head hash;
@@ -358,7 +359,6 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx,
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5_macsec_obj_attrs obj_attrs;
union mlx5e_macsec_rule *macsec_rule;
- struct macsec_key *key;
int err;
obj_attrs.next_pn = sa->next_pn;
@@ -368,13 +368,9 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx,
obj_attrs.aso_pdn = macsec->aso.pdn;
obj_attrs.epn_state = sa->epn_state;
- key = (is_tx) ? &ctx->sa.tx_sa->key : &ctx->sa.rx_sa->key;
-
if (sa->epn_state.epn_enabled) {
- obj_attrs.ssci = (is_tx) ? cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci) :
- cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci);
-
- memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt));
+ obj_attrs.ssci = cpu_to_be32((__force u32)sa->ssci);
+ memcpy(&obj_attrs.salt, &sa->salt, sizeof(sa->salt));
}
obj_attrs.replay_window = ctx->secy->replay_window;
@@ -499,10 +495,11 @@ mlx5e_macsec_get_macsec_device_context(const struct mlx5e_macsec *macsec,
}
static void update_macsec_epn(struct mlx5e_macsec_sa *sa, const struct macsec_key *key,
- const pn_t *next_pn_halves)
+ const pn_t *next_pn_halves, ssci_t ssci)
{
struct mlx5e_macsec_epn_state *epn_state = &sa->epn_state;
+ sa->ssci = ssci;
sa->salt = key->salt;
epn_state->epn_enabled = 1;
epn_state->epn_msb = next_pn_halves->upper;
@@ -550,7 +547,8 @@ static int mlx5e_macsec_add_txsa(struct macsec_context *ctx)
tx_sa->assoc_num = assoc_num;
if (secy->xpn)
- update_macsec_epn(tx_sa, &ctx_tx_sa->key, &ctx_tx_sa->next_pn_halves);
+ update_macsec_epn(tx_sa, &ctx_tx_sa->key, &ctx_tx_sa->next_pn_halves,
+ ctx_tx_sa->ssci);
err = mlx5_create_encryption_key(mdev, ctx->sa.key, secy->key_len,
MLX5_ACCEL_OBJ_MACSEC_KEY,
@@ -945,7 +943,8 @@ static int mlx5e_macsec_add_rxsa(struct macsec_context *ctx)
rx_sa->fs_id = rx_sc->sc_xarray_element->fs_id;
if (ctx->secy->xpn)
- update_macsec_epn(rx_sa, &ctx_rx_sa->key, &ctx_rx_sa->next_pn_halves);
+ update_macsec_epn(rx_sa, &ctx_rx_sa->key, &ctx_rx_sa->next_pn_halves,
+ ctx_rx_sa->ssci);
err = mlx5_create_encryption_key(mdev, ctx->sa.key, ctx->secy->key_len,
MLX5_ACCEL_OBJ_MACSEC_KEY,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 1892ccb889b3..7cd36f4ac3ef 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -443,7 +443,7 @@ void mlx5e_enable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc)
void mlx5e_disable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc)
{
- if (fs->vlan->cvlan_filter_disabled)
+ if (!fs->vlan || fs->vlan->cvlan_filter_disabled)
return;
fs->vlan->cvlan_filter_disabled = true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 8d36e2de53a9..6c24f33a5ea5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -591,7 +591,8 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param
rq->ix = c->ix;
rq->channel = c;
rq->mdev = mdev;
- rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ rq->hw_mtu =
+ MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN * !params->scatter_fcs_en;
rq->xdpsq = &c->rq_xdpsq;
rq->stats = &c->priv->channel_stats[c->ix]->rq;
rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
@@ -1014,35 +1015,6 @@ int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state)
return mlx5e_rq_to_ready(rq, curr_state);
}
-static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable)
-{
- struct mlx5_core_dev *mdev = rq->mdev;
-
- void *in;
- void *rqc;
- int inlen;
- int err;
-
- inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
-
- rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
-
- MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY);
- MLX5_SET64(modify_rq_in, in, modify_bitmask,
- MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_SCATTER_FCS);
- MLX5_SET(rqc, rqc, scatter_fcs, enable);
- MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
-
- err = mlx5_core_modify_rq(mdev, rq->rqn, in);
-
- kvfree(in);
-
- return err;
-}
-
static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
{
struct mlx5_core_dev *mdev = rq->mdev;
@@ -1305,7 +1277,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
sq->channel = c;
sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
sq->min_inline_mode = params->tx_min_inline_mode;
- sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN;
sq->xsk_pool = xsk_pool;
sq->stats = sq->xsk_pool ?
@@ -3314,20 +3286,6 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
mlx5e_destroy_tises(priv);
}
-static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable)
-{
- int err = 0;
- int i;
-
- for (i = 0; i < chs->num; i++) {
- err = mlx5e_modify_rq_scatter_fcs(&chs->c[i]->rq, enable);
- if (err)
- return err;
- }
-
- return 0;
-}
-
static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
{
int err;
@@ -3903,41 +3861,27 @@ static int mlx5e_set_rx_port_ts(struct mlx5_core_dev *mdev, bool enable)
return mlx5_set_ports_check(mdev, in, sizeof(in));
}
+static int mlx5e_set_rx_port_ts_wrap(struct mlx5e_priv *priv, void *ctx)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ bool enable = *(bool *)ctx;
+
+ return mlx5e_set_rx_port_ts(mdev, enable);
+}
+
static int set_feature_rx_fcs(struct net_device *netdev, bool enable)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5e_channels *chs = &priv->channels;
- struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_params new_params;
int err;
mutex_lock(&priv->state_lock);
- if (enable) {
- err = mlx5e_set_rx_port_ts(mdev, false);
- if (err)
- goto out;
-
- chs->params.scatter_fcs_en = true;
- err = mlx5e_modify_channels_scatter_fcs(chs, true);
- if (err) {
- chs->params.scatter_fcs_en = false;
- mlx5e_set_rx_port_ts(mdev, true);
- }
- } else {
- chs->params.scatter_fcs_en = false;
- err = mlx5e_modify_channels_scatter_fcs(chs, false);
- if (err) {
- chs->params.scatter_fcs_en = true;
- goto out;
- }
- err = mlx5e_set_rx_port_ts(mdev, true);
- if (err) {
- mlx5_core_warn(mdev, "Failed to set RX port timestamp %d\n", err);
- err = 0;
- }
- }
-
-out:
+ new_params = chs->params;
+ new_params.scatter_fcs_en = enable;
+ err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_set_rx_port_ts_wrap,
+ &new_params.scatter_fcs_en, true);
mutex_unlock(&priv->state_lock);
return err;
}
@@ -4074,6 +4018,10 @@ static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev
if (netdev->features & NETIF_F_GRO_HW)
netdev_warn(netdev, "Disabling HW_GRO, not supported in switchdev mode\n");
+ features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+ if (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
+ netdev_warn(netdev, "Disabling HW_VLAN CTAG FILTERING, not supported in switchdev mode\n");
+
return features;
}
@@ -4084,6 +4032,9 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
struct mlx5e_vlan_table *vlan;
struct mlx5e_params *params;
+ if (!netif_device_present(netdev))
+ return features;
+
vlan = mlx5e_fs_get_vlan(priv->fs);
mutex_lock(&priv->state_lock);
params = &priv->channels.params;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 75b9e1528fd2..7d90e5b72854 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -191,7 +191,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep)
if (err) {
netdev_warn(priv->netdev, "vport %d error %d reading stats\n",
rep->vport, err);
- return;
+ goto out;
}
#define MLX5_GET_CTR(p, x) \
@@ -241,6 +241,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep)
rep_stats->tx_vport_rdma_multicast_bytes =
MLX5_GET_CTR(out, received_ib_multicast.octets);
+out:
kvfree(out);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index c8820ab22169..3df455f6b168 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -2419,7 +2419,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
priv = mlx5i_epriv(netdev);
tstamp = &priv->tstamp;
- stats = rq->stats;
+ stats = &priv->channel_stats[rq->ix]->rq;
flags_rqpn = be32_to_cpu(cqe->flags_rqpn);
g = (flags_rqpn >> 28) & 3;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 9af2aa2922f5..243d5d7750be 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -166,6 +166,7 @@ struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc)
* it's different than the ht->mutex here.
*/
static struct lock_class_key tc_ht_lock_key;
+static struct lock_class_key tc_ht_wq_key;
static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
static void free_flow_post_acts(struct mlx5e_tc_flow *flow);
@@ -1301,7 +1302,6 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
- mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
if (err)
return err;
}
@@ -1359,8 +1359,10 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
}
mutex_unlock(&tc->t_lock);
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
mlx5e_detach_mod_hdr(priv, flow);
+ }
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
mlx5_fc_destroy(priv->mdev, attr->counter);
@@ -5181,6 +5183,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
return err;
lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
+ lockdep_init_map(&tc->ht.run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0);
mapping_id = mlx5_query_nic_system_image_guid(dev);
@@ -5287,6 +5290,7 @@ int mlx5e_tc_ht_init(struct rhashtable *tc_ht)
return err;
lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key);
+ lockdep_init_map(&tc_ht->run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0);
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
index 60a73990017c..6b4c9ffad95b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
@@ -67,6 +67,7 @@ static void esw_acl_egress_lgcy_groups_destroy(struct mlx5_vport *vport)
int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
+ bool vst_mode_steering = esw_vst_mode_is_steering(esw);
struct mlx5_flow_destination drop_ctr_dst = {};
struct mlx5_flow_destination *dst = NULL;
struct mlx5_fc *drop_counter = NULL;
@@ -77,6 +78,7 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
*/
int table_size = 2;
int dest_num = 0;
+ int actions_flag;
int err = 0;
if (vport->egress.legacy.drop_counter) {
@@ -119,8 +121,11 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
vport->vport, vport->info.vlan, vport->info.qos);
/* Allowed vlan rule */
+ actions_flag = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ if (vst_mode_steering)
+ actions_flag |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
err = esw_egress_acl_vlan_create(esw, vport, NULL, vport->info.vlan,
- MLX5_FLOW_CONTEXT_ACTION_ALLOW);
+ actions_flag);
if (err)
goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
index b1a5199260f6..093ed86a0acd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
@@ -139,11 +139,14 @@ static void esw_acl_ingress_lgcy_groups_destroy(struct mlx5_vport *vport)
int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
+ bool vst_mode_steering = esw_vst_mode_is_steering(esw);
struct mlx5_flow_destination drop_ctr_dst = {};
struct mlx5_flow_destination *dst = NULL;
struct mlx5_flow_act flow_act = {};
struct mlx5_flow_spec *spec = NULL;
struct mlx5_fc *counter = NULL;
+ bool vst_check_cvlan = false;
+ bool vst_push_cvlan = false;
/* The ingress acl table contains 4 groups
* (2 active rules at the same time -
* 1 allow rule from one of the first 3 groups.
@@ -203,7 +206,26 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
goto out;
}
- if (vport->info.vlan || vport->info.qos)
+ if ((vport->info.vlan || vport->info.qos)) {
+ if (vst_mode_steering)
+ vst_push_cvlan = true;
+ else if (!MLX5_CAP_ESW(esw->dev, vport_cvlan_insert_always))
+ vst_check_cvlan = true;
+ }
+
+ if (vst_check_cvlan || vport->info.spoofchk)
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ /* Create ingress allow rule */
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ if (vst_push_cvlan) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+ flow_act.vlan[0].prio = vport->info.qos;
+ flow_act.vlan[0].vid = vport->info.vlan;
+ flow_act.vlan[0].ethtype = ETH_P_8021Q;
+ }
+
+ if (vst_check_cvlan)
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
outer_headers.cvlan_tag);
@@ -218,9 +240,6 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
ether_addr_copy(smac_v, vport->info.mac);
}
- /* Create ingress allow rule */
- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec,
&flow_act, NULL, 0);
if (IS_ERR(vport->ingress.allow_rule)) {
@@ -232,6 +251,9 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
goto out;
}
+ if (!vst_check_cvlan && !vport->info.spoofchk)
+ goto out;
+
memset(&flow_act, 0, sizeof(flow_act));
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
/* Attach drop flow counter */
@@ -257,7 +279,8 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
return 0;
out:
- esw_acl_ingress_lgcy_cleanup(esw, vport);
+ if (err)
+ esw_acl_ingress_lgcy_cleanup(esw, vport);
kvfree(spec);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
index b176648d1343..3cdcb0e0b20f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
@@ -1715,7 +1715,7 @@ void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16
struct mlx5_esw_bridge *bridge;
port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
- if (!port || port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER)
+ if (!port)
return;
bridge = port->bridge;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
index 4f8a24d84a86..75015d370922 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -22,15 +22,13 @@ struct mlx5_esw_rate_group {
};
static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx,
- u32 parent_ix, u32 tsar_ix,
- u32 max_rate, u32 bw_share)
+ u32 tsar_ix, u32 max_rate, u32 bw_share)
{
u32 bitmask = 0;
if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
return -EOPNOTSUPP;
- MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_ix);
MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
@@ -51,7 +49,7 @@ static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_g
int err;
err = esw_qos_tsar_config(dev, sched_ctx,
- esw->qos.root_tsar_ix, group->tsar_ix,
+ group->tsar_ix,
max_rate, bw_share);
if (err)
NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed");
@@ -67,23 +65,13 @@ static int esw_qos_vport_config(struct mlx5_eswitch *esw,
struct netlink_ext_ack *extack)
{
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
- struct mlx5_esw_rate_group *group = vport->qos.group;
struct mlx5_core_dev *dev = esw->dev;
- u32 parent_tsar_ix;
- void *vport_elem;
int err;
if (!vport->qos.enabled)
return -EIO;
- parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
- MLX5_SET(scheduling_context, sched_ctx, element_type,
- SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
- vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
- element_attributes);
- MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
-
- err = esw_qos_tsar_config(dev, sched_ctx, parent_tsar_ix, vport->qos.esw_tsar_ix,
+ err = esw_qos_tsar_config(dev, sched_ctx, vport->qos.esw_tsar_ix,
max_rate, bw_share);
if (err) {
esw_warn(esw->dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 527e4bffda8d..9daf55e90367 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -161,10 +161,17 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport,
esw_vport_context.vport_cvlan_strip, 1);
if (set_flags & SET_VLAN_INSERT) {
- /* insert only if no vlan in packet */
- MLX5_SET(modify_esw_vport_context_in, in,
- esw_vport_context.vport_cvlan_insert, 1);
-
+ if (MLX5_CAP_ESW(dev, vport_cvlan_insert_always)) {
+ /* insert either if vlan exist in packet or not */
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.vport_cvlan_insert,
+ MLX5_VPORT_CVLAN_INSERT_ALWAYS);
+ } else {
+ /* insert only if no vlan in packet */
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.vport_cvlan_insert,
+ MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN);
+ }
MLX5_SET(modify_esw_vport_context_in, in,
esw_vport_context.cvlan_pcp, qos);
MLX5_SET(modify_esw_vport_context_in, in,
@@ -809,6 +816,7 @@ out_free:
static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
{
+ bool vst_mode_steering = esw_vst_mode_is_steering(esw);
u16 vport_num = vport->vport;
int flags;
int err;
@@ -839,8 +847,9 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
flags = (vport->info.vlan || vport->info.qos) ?
SET_VLAN_STRIP | SET_VLAN_INSERT : 0;
- modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan,
- vport->info.qos, flags);
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering)
+ modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan,
+ vport->info.qos, flags);
return 0;
@@ -1455,6 +1464,7 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw)
mlx5_lag_disable_change(esw->dev);
down_write(&esw->mode_lock);
mlx5_eswitch_disable_locked(esw);
+ esw->mode = MLX5_ESWITCH_LEGACY;
up_write(&esw->mode_lock);
mlx5_lag_enable_change(esw->dev);
}
@@ -1848,6 +1858,7 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
u16 vport, u16 vlan, u8 qos, u8 set_flags)
{
struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+ bool vst_mode_steering = esw_vst_mode_is_steering(esw);
int err = 0;
if (IS_ERR(evport))
@@ -1855,9 +1866,11 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
if (vlan > 4095 || qos > 7)
return -EINVAL;
- err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags);
- if (err)
- return err;
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering) {
+ err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags);
+ if (err)
+ return err;
+ }
evport->info.vlan = vlan;
evport->info.qos = qos;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 5a85a5d32be7..92644fbb5081 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -527,6 +527,12 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
u16 vport, u16 vlan, u8 qos, u8 set_flags);
+static inline bool esw_vst_mode_is_steering(struct mlx5_eswitch *esw)
+{
+ return (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, pop_vlan) &&
+ MLX5_CAP_ESW_INGRESS_ACL(esw->dev, push_vlan));
+}
+
static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev,
u8 vlan_depth)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index e455b215c708..c981fa77f439 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -143,7 +143,7 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
if (mlx5_esw_indir_table_decap_vport(attr))
vport = mlx5_esw_indir_table_decap_vport(attr);
- if (attr && !attr->chain && esw_attr->int_port)
+ if (!attr->chain && esw_attr && esw_attr->int_port)
metadata =
mlx5e_tc_int_port_get_metadata_for_match(esw_attr->int_port);
else
@@ -4143,8 +4143,6 @@ int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable,
}
hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
- memcpy(hca_caps, MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability),
- MLX5_UN_SZ_BYTES(hca_cap_union));
MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, 1);
err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport->vport,
@@ -4236,8 +4234,6 @@ int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable,
}
hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
- memcpy(hca_caps, MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability),
- MLX5_UN_SZ_BYTES(hca_cap_union));
MLX5_SET(cmd_hca_cap, hca_caps, roce, enable);
err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport_num,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 86ed87d704f7..879555ba847d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -674,6 +674,13 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
dev = container_of(priv, struct mlx5_core_dev, priv);
devlink = priv_to_devlink(dev);
+ mutex_lock(&dev->intf_state_mutex);
+ if (test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) {
+ mlx5_core_err(dev, "health works are not permitted at this stage\n");
+ mutex_unlock(&dev->intf_state_mutex);
+ return;
+ }
+ mutex_unlock(&dev->intf_state_mutex);
enter_error_state(dev, false);
if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
devl_lock(devlink);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
index c247cca154e9..e09518f887a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
@@ -90,9 +90,21 @@ static void mlx5i_get_ringparam(struct net_device *dev,
static int mlx5i_set_channels(struct net_device *dev,
struct ethtool_channels *ch)
{
- struct mlx5e_priv *priv = mlx5i_epriv(dev);
+ struct mlx5i_priv *ipriv = netdev_priv(dev);
+ struct mlx5e_priv *epriv = mlx5i_epriv(dev);
+
+ /* rtnl lock protects from race between this ethtool op and sub
+ * interface ndo_init/uninit.
+ */
+ ASSERT_RTNL();
+ if (ipriv->num_sub_interfaces > 0) {
+ mlx5_core_warn(epriv->mdev,
+ "can't change number of channels for interfaces with sub interfaces (%u)\n",
+ ipriv->num_sub_interfaces);
+ return -EINVAL;
+ }
- return mlx5e_ethtool_set_channels(priv, ch);
+ return mlx5e_ethtool_set_channels(epriv, ch);
}
static void mlx5i_get_channels(struct net_device *dev,
@@ -177,16 +189,16 @@ static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate)
}
}
-static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper)
+static u32 mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper)
{
int rate, width;
rate = mlx5_ptys_rate_enum_to_int(ib_proto_oper);
if (rate < 0)
- return -EINVAL;
+ return SPEED_UNKNOWN;
width = mlx5_ptys_width_enum_to_int(ib_link_width_oper);
if (width < 0)
- return -EINVAL;
+ return SPEED_UNKNOWN;
return rate * width;
}
@@ -209,16 +221,13 @@ static int mlx5i_get_link_ksettings(struct net_device *netdev,
ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
speed = mlx5i_get_speed_settings(ib_link_width_oper, ib_proto_oper);
- if (speed < 0)
- return -EINVAL;
+ link_ksettings->base.speed = speed;
+ link_ksettings->base.duplex = speed == SPEED_UNKNOWN ? DUPLEX_UNKNOWN : DUPLEX_FULL;
- link_ksettings->base.duplex = DUPLEX_FULL;
link_ksettings->base.port = PORT_OTHER;
link_ksettings->base.autoneg = AUTONEG_DISABLE;
- link_ksettings->base.speed = speed;
-
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 7c5c500fd215..911cf4d23964 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -71,6 +71,10 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
params->packet_merge.type = MLX5E_PACKET_MERGE_NONE;
params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN;
params->tunneled_offload_en = false;
+
+ /* CQE compression is not supported for IPoIB */
+ params->rx_cqe_compress_def = false;
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def);
}
/* Called directly after IPoIB netdevice was created to initialize SW structs */
@@ -156,6 +160,44 @@ void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
stats->tx_dropped = sstats->tx_queue_dropped;
}
+struct net_device *mlx5i_parent_get(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv, *parent_ipriv;
+ struct net_device *parent_dev;
+ int parent_ifindex;
+
+ ipriv = priv->ppriv;
+
+ parent_ifindex = netdev->netdev_ops->ndo_get_iflink(netdev);
+ parent_dev = dev_get_by_index(dev_net(netdev), parent_ifindex);
+ if (!parent_dev)
+ return NULL;
+
+ parent_ipriv = netdev_priv(parent_dev);
+
+ ASSERT_RTNL();
+ parent_ipriv->num_sub_interfaces++;
+
+ ipriv->parent_dev = parent_dev;
+
+ return parent_dev;
+}
+
+void mlx5i_parent_put(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv, *parent_ipriv;
+
+ ipriv = priv->ppriv;
+ parent_ipriv = netdev_priv(ipriv->parent_dev);
+
+ ASSERT_RTNL();
+ parent_ipriv->num_sub_interfaces--;
+
+ dev_put(ipriv->parent_dev);
+}
+
int mlx5i_init_underlay_qp(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
index 99d46fda9f82..f3f2af972020 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
@@ -54,9 +54,11 @@ struct mlx5i_priv {
struct rdma_netdev rn; /* keep this first */
u32 qpn;
bool sub_interface;
+ u32 num_sub_interfaces;
u32 qkey;
u16 pkey_index;
struct mlx5i_pkey_qpn_ht *qpn_htbl;
+ struct net_device *parent_dev;
char *mlx5e_priv[];
};
@@ -117,5 +119,9 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more);
void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
+/* Reference management for child to parent interfaces. */
+struct net_device *mlx5i_parent_get(struct net_device *netdev);
+void mlx5i_parent_put(struct net_device *netdev);
+
#endif /* CONFIG_MLX5_CORE_IPOIB */
#endif /* __MLX5E_IPOB_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
index 4d9c9e49645c..03e681297937 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
@@ -158,21 +158,28 @@ static int mlx5i_pkey_dev_init(struct net_device *dev)
struct mlx5e_priv *priv = mlx5i_epriv(dev);
struct mlx5i_priv *ipriv, *parent_ipriv;
struct net_device *parent_dev;
- int parent_ifindex;
ipriv = priv->ppriv;
- /* Get QPN to netdevice hash table from parent */
- parent_ifindex = dev->netdev_ops->ndo_get_iflink(dev);
- parent_dev = dev_get_by_index(dev_net(dev), parent_ifindex);
+ /* Link to parent */
+ parent_dev = mlx5i_parent_get(dev);
if (!parent_dev) {
mlx5_core_warn(priv->mdev, "failed to get parent device\n");
return -EINVAL;
}
+ if (dev->num_rx_queues < parent_dev->real_num_rx_queues) {
+ mlx5_core_warn(priv->mdev,
+ "failed to create child device with rx queues [%d] less than parent's [%d]\n",
+ dev->num_rx_queues,
+ parent_dev->real_num_rx_queues);
+ mlx5i_parent_put(dev);
+ return -EINVAL;
+ }
+
+ /* Get QPN to netdevice hash table from parent */
parent_ipriv = netdev_priv(parent_dev);
ipriv->qpn_htbl = parent_ipriv->qpn_htbl;
- dev_put(parent_dev);
return mlx5i_dev_init(dev);
}
@@ -184,6 +191,7 @@ static int mlx5i_pkey_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
static void mlx5i_pkey_dev_cleanup(struct net_device *netdev)
{
+ mlx5i_parent_put(netdev);
return mlx5i_dev_cleanup(netdev);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index 32c3e0a649a7..ad32b80e8501 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -228,6 +228,7 @@ static void mlx5_ldev_free(struct kref *ref)
if (ldev->nb.notifier_call)
unregister_netdevice_notifier_net(&init_net, &ldev->nb);
mlx5_lag_mp_cleanup(ldev);
+ cancel_delayed_work_sync(&ldev->bond_work);
destroy_workqueue(ldev->wq);
mlx5_lag_mpesw_cleanup(ldev);
mutex_destroy(&ldev->lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 69cfe60c558a..69318b143268 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -681,7 +681,7 @@ static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
static const struct ptp_clock_info mlx5_ptp_clock_info = {
.owner = THIS_MODULE,
.name = "mlx5_ptp",
- .max_adj = 100000000,
+ .max_adj = 50000000,
.n_alarm = 0,
.n_ext_ts = 0,
.n_per_out = 0,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 7f5db13e3550..4e1b5757528a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -613,7 +613,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
MLX5_SET(cmd_hca_cap, set_hca_cap, num_total_dynamic_vf_msix,
MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix));
- if (MLX5_CAP_GEN(dev, roce_rw_supported))
+ if (MLX5_CAP_GEN(dev, roce_rw_supported) && MLX5_CAP_GEN_MAX(dev, roce))
MLX5_SET(cmd_hca_cap, set_hca_cap, roce,
mlx5_is_roce_on(dev));
@@ -1050,6 +1050,8 @@ err_rl_cleanup:
err_tables_cleanup:
mlx5_geneve_destroy(dev->geneve);
mlx5_vxlan_destroy(dev->vxlan);
+ mlx5_cleanup_clock(dev);
+ mlx5_cleanup_reserved_gids(dev);
mlx5_cq_debugfs_cleanup(dev);
mlx5_fw_reset_cleanup(dev);
err_events_cleanup:
@@ -2096,7 +2098,7 @@ static void mlx5_core_verify_params(void)
}
}
-static int __init init(void)
+static int __init mlx5_init(void)
{
int err;
@@ -2108,7 +2110,7 @@ static int __init init(void)
mlx5_core_verify_params();
mlx5_register_debugfs();
- err = pci_register_driver(&mlx5_core_driver);
+ err = mlx5e_init();
if (err)
goto err_debug;
@@ -2116,28 +2118,28 @@ static int __init init(void)
if (err)
goto err_sf;
- err = mlx5e_init();
+ err = pci_register_driver(&mlx5_core_driver);
if (err)
- goto err_en;
+ goto err_pci;
return 0;
-err_en:
+err_pci:
mlx5_sf_driver_unregister();
err_sf:
- pci_unregister_driver(&mlx5_core_driver);
+ mlx5e_cleanup();
err_debug:
mlx5_unregister_debugfs();
return err;
}
-static void __exit cleanup(void)
+static void __exit mlx5_cleanup(void)
{
- mlx5e_cleanup();
- mlx5_sf_driver_unregister();
pci_unregister_driver(&mlx5_core_driver);
+ mlx5_sf_driver_unregister();
+ mlx5e_cleanup();
mlx5_unregister_debugfs();
}
-module_init(init);
-module_exit(cleanup);
+module_init(mlx5_init);
+module_exit(mlx5_cleanup);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 60596357bfc7..0eb50be175cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -74,6 +74,14 @@ static u32 get_function(u16 func_id, bool ec_function)
return (u32)func_id | (ec_function << 16);
}
+static u16 func_id_to_type(struct mlx5_core_dev *dev, u16 func_id, bool ec_function)
+{
+ if (!func_id)
+ return mlx5_core_is_ecpf(dev) && !ec_function ? MLX5_HOST_PF : MLX5_PF;
+
+ return func_id <= mlx5_core_max_vfs(dev) ? MLX5_VF : MLX5_SF;
+}
+
static struct rb_root *page_root_per_function(struct mlx5_core_dev *dev, u32 function)
{
struct rb_root *root;
@@ -332,6 +340,7 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
int inlen = MLX5_ST_SZ_BYTES(manage_pages_in);
int notify_fail = event;
+ u16 func_type;
u64 addr;
int err;
u32 *in;
@@ -383,11 +392,9 @@ retry:
goto out_dropped;
}
+ func_type = func_id_to_type(dev, func_id, ec_function);
+ dev->priv.page_counters[func_type] += npages;
dev->priv.fw_pages += npages;
- if (func_id)
- dev->priv.vfs_pages += npages;
- else if (mlx5_core_is_ecpf(dev) && !ec_function)
- dev->priv.host_pf_pages += npages;
mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x, err %d\n",
npages, ec_function, func_id, err);
@@ -414,6 +421,7 @@ static void release_all_pages(struct mlx5_core_dev *dev, u16 func_id,
struct rb_root *root;
struct rb_node *p;
int npages = 0;
+ u16 func_type;
root = xa_load(&dev->priv.page_root_xa, function);
if (WARN_ON_ONCE(!root))
@@ -428,11 +436,9 @@ static void release_all_pages(struct mlx5_core_dev *dev, u16 func_id,
free_fwp(dev, fwp, fwp->free_count);
}
+ func_type = func_id_to_type(dev, func_id, ec_function);
+ dev->priv.page_counters[func_type] -= npages;
dev->priv.fw_pages -= npages;
- if (func_id)
- dev->priv.vfs_pages -= npages;
- else if (mlx5_core_is_ecpf(dev) && !ec_function)
- dev->priv.host_pf_pages -= npages;
mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x\n",
npages, ec_function, func_id);
@@ -498,6 +504,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
int outlen = MLX5_ST_SZ_BYTES(manage_pages_out);
u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {};
int num_claimed;
+ u16 func_type;
u32 *out;
int err;
int i;
@@ -549,11 +556,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
if (nclaimed)
*nclaimed = num_claimed;
+ func_type = func_id_to_type(dev, func_id, ec_function);
+ dev->priv.page_counters[func_type] -= num_claimed;
dev->priv.fw_pages -= num_claimed;
- if (func_id)
- dev->priv.vfs_pages -= num_claimed;
- else if (mlx5_core_is_ecpf(dev) && !ec_function)
- dev->priv.host_pf_pages -= num_claimed;
out_free:
kvfree(out);
@@ -706,12 +711,12 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
WARN(dev->priv.fw_pages,
"FW pages counter is %d after reclaiming all pages\n",
dev->priv.fw_pages);
- WARN(dev->priv.vfs_pages,
+ WARN(dev->priv.page_counters[MLX5_VF],
"VFs FW pages counter is %d after reclaiming all pages\n",
- dev->priv.vfs_pages);
- WARN(dev->priv.host_pf_pages,
+ dev->priv.page_counters[MLX5_VF]);
+ WARN(dev->priv.page_counters[MLX5_HOST_PF],
"External host PF FW pages counter is %d after reclaiming all pages\n",
- dev->priv.host_pf_pages);
+ dev->priv.page_counters[MLX5_HOST_PF]);
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/qos.c
index 0777be24a307..8bce730b5c5b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.c
@@ -62,13 +62,12 @@ int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id)
return mlx5_qos_create_inner_node(mdev, MLX5_QOS_DEFAULT_DWRR_UID, 0, 0, id);
}
-int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 parent_id,
+int mlx5_qos_update_node(struct mlx5_core_dev *mdev,
u32 bw_share, u32 max_avg_bw, u32 id)
{
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
u32 bitmask = 0;
- MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/qos.h
index 125e4e47e6f7..624ce822b7f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qos.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.h
@@ -23,7 +23,7 @@ int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id,
int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id,
u32 bw_share, u32 max_avg_bw, u32 *id);
int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id);
-int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 parent_id, u32 bw_share,
+int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 bw_share,
u32 max_avg_bw, u32 id);
int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index c0e6c487c63c..3008e9ce2bbf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -147,7 +147,7 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf)
mlx5_eswitch_disable_sriov(dev->priv.eswitch, clear_vf);
- if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages))
+ if (mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_VF]))
mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
index 74cbe53ee9db..042ca0349124 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
@@ -3,7 +3,12 @@
#include "dr_types.h"
+#if defined(CONFIG_FRAME_WARN) && (CONFIG_FRAME_WARN < 2048)
+/* don't try to optimize STE allocation if the stack is too constaraining */
+#define DR_RULE_MAX_STES_OPTIMIZED 0
+#else
#define DR_RULE_MAX_STES_OPTIMIZED 5
+#endif
#define DR_RULE_MAX_STE_CHAIN_OPTIMIZED (DR_RULE_MAX_STES_OPTIMIZED + DR_ACTION_MAX_STES)
static int dr_rule_append_to_miss_list(struct mlx5dr_domain *dmn,
@@ -1133,12 +1138,14 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
rule->flow_source))
return 0;
+ mlx5dr_domain_nic_lock(nic_dmn);
+
ret = mlx5dr_matcher_select_builders(matcher,
nic_matcher,
dr_rule_get_ipv(&param->outer),
dr_rule_get_ipv(&param->inner));
if (ret)
- return ret;
+ goto err_unlock;
hw_ste_arr_is_opt = nic_matcher->num_of_builders <= DR_RULE_MAX_STES_OPTIMIZED;
if (likely(hw_ste_arr_is_opt)) {
@@ -1147,12 +1154,12 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
hw_ste_arr = kzalloc((nic_matcher->num_of_builders + DR_ACTION_MAX_STES) *
DR_STE_SIZE, GFP_KERNEL);
- if (!hw_ste_arr)
- return -ENOMEM;
+ if (!hw_ste_arr) {
+ ret = -ENOMEM;
+ goto err_unlock;
+ }
}
- mlx5dr_domain_nic_lock(nic_dmn);
-
ret = mlx5dr_matcher_add_to_tbl_nic(dmn, nic_matcher);
if (ret)
goto free_hw_ste;
@@ -1236,11 +1243,12 @@ remove_from_nic_tbl:
mlx5dr_matcher_remove_from_tbl_nic(dmn, nic_matcher);
free_hw_ste:
- mlx5dr_domain_nic_unlock(nic_dmn);
-
- if (unlikely(!hw_ste_arr_is_opt))
+ if (!hw_ste_arr_is_opt)
kfree(hw_ste_arr);
+err_unlock:
+ mlx5dr_domain_nic_unlock(nic_dmn);
+
return ret;
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index c22c3ac4e2a1..09e32778b012 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2951,7 +2951,7 @@ struct mlxsw_sp_nexthop_group_info {
gateway:1, /* routes using the group use a gateway */
is_resilient:1;
struct list_head list; /* member in nh_res_grp_list */
- struct mlxsw_sp_nexthop nexthops[0];
+ struct mlxsw_sp_nexthop nexthops[];
#define nh_rif nexthops[0].rif
};
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
index 5314c064ceae..55b484b10562 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
@@ -608,12 +608,12 @@ allocate_new:
lan966x_fdma_rx_reload(rx);
}
- if (counter < weight && napi_complete_done(napi, counter))
- lan_wr(0xff, lan966x, FDMA_INTR_DB_ENA);
-
if (redirect)
xdp_do_flush();
+ if (counter < weight && napi_complete_done(napi, counter))
+ lan_wr(0xff, lan966x, FDMA_INTR_DB_ENA);
+
return counter;
}
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index cadde20505ba..580c91d24a52 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -1043,11 +1043,6 @@ static int lan966x_probe(struct platform_device *pdev)
lan966x->base_mac[5] &= 0xf0;
}
- ports = device_get_named_child_node(&pdev->dev, "ethernet-ports");
- if (!ports)
- return dev_err_probe(&pdev->dev, -ENODEV,
- "no ethernet-ports child found\n");
-
err = lan966x_create_targets(pdev, lan966x);
if (err)
return dev_err_probe(&pdev->dev, err,
@@ -1125,6 +1120,11 @@ static int lan966x_probe(struct platform_device *pdev)
}
}
+ ports = device_get_named_child_node(&pdev->dev, "ethernet-ports");
+ if (!ports)
+ return dev_err_probe(&pdev->dev, -ENODEV,
+ "no ethernet-ports child found\n");
+
/* init switch */
lan966x_init(lan966x);
lan966x_stats_init(lan966x);
@@ -1162,6 +1162,8 @@ static int lan966x_probe(struct platform_device *pdev)
goto cleanup_ports;
}
+ fwnode_handle_put(ports);
+
lan966x_mdb_init(lan966x);
err = lan966x_fdb_init(lan966x);
if (err)
@@ -1191,6 +1193,7 @@ cleanup_fdb:
lan966x_fdb_deinit(lan966x);
cleanup_ports:
+ fwnode_handle_put(ports);
fwnode_handle_put(portnp);
lan966x_cleanup_ports(lan966x);
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c
index 1a61c6cdb077..0050fcb988b7 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c
@@ -381,7 +381,7 @@ int lan966x_port_pcs_set(struct lan966x_port *port,
}
/* Take PCS out of reset */
- lan_rmw(DEV_CLOCK_CFG_LINK_SPEED_SET(2) |
+ lan_rmw(DEV_CLOCK_CFG_LINK_SPEED_SET(LAN966X_SPEED_1000) |
DEV_CLOCK_CFG_PCS_RX_RST_SET(0) |
DEV_CLOCK_CFG_PCS_TX_RST_SET(0),
DEV_CLOCK_CFG_LINK_SPEED |
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c
index f9ebfaafbebc..a8348437dd87 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c
@@ -1073,6 +1073,9 @@ void lan966x_ptp_deinit(struct lan966x *lan966x)
struct lan966x_port *port;
int i;
+ if (!lan966x->ptp)
+ return;
+
for (i = 0; i < lan966x->num_phys_ports; i++) {
port = lan966x->ports[i];
if (!port)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c
index d8dc9fbb81e1..a54c0426a35f 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c
@@ -95,10 +95,7 @@ lan966x_vcap_is2_get_port_keysets(struct net_device *dev, int lookup,
bool found = false;
u32 val;
- /* Check if the port keyset selection is enabled */
val = lan_rd(lan966x, ANA_VCAP_S2_CFG(port->chip_port));
- if (!ANA_VCAP_S2_CFG_ENA_GET(val))
- return -ENOENT;
/* Collect all keysets for the port in a list */
if (l3_proto == ETH_P_ALL)
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index d25f4f09faa0..3c5d4fe99373 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -834,7 +834,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev)
if (err)
goto cleanup_config;
- if (!of_get_mac_address(np, sparx5->base_mac)) {
+ if (of_get_mac_address(np, sparx5->base_mac)) {
dev_info(sparx5->dev, "MAC addr was not set, use random MAC\n");
eth_random_addr(sparx5->base_mac);
sparx5->base_mac[5] = 0;
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c
index 0ed1ea7727c5..69e76634f9aa 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c
@@ -633,7 +633,7 @@ int sparx5_ptp_init(struct sparx5 *sparx5)
/* Enable master counters */
spx5_wr(PTP_PTP_DOM_CFG_PTP_ENA_SET(0x7), sparx5, PTP_PTP_DOM_CFG);
- for (i = 0; i < sparx5->port_count; i++) {
+ for (i = 0; i < SPX5_PORTS; i++) {
port = sparx5->ports[i];
if (!port)
continue;
@@ -649,7 +649,7 @@ void sparx5_ptp_deinit(struct sparx5 *sparx5)
struct sparx5_port *port;
int i;
- for (i = 0; i < sparx5->port_count; i++) {
+ for (i = 0; i < SPX5_PORTS; i++) {
port = sparx5->ports[i];
if (!port)
continue;
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index e708c2d04983..f9b8f372ec8a 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -1217,9 +1217,7 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev)
unsigned int max_queues_per_port = num_online_cpus();
struct gdma_context *gc = pci_get_drvdata(pdev);
struct gdma_irq_context *gic;
- unsigned int max_irqs;
- u16 *cpus;
- cpumask_var_t req_mask;
+ unsigned int max_irqs, cpu;
int nvec, irq;
int err, i = 0, j;
@@ -1240,39 +1238,31 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev)
goto free_irq_vector;
}
- if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) {
- err = -ENOMEM;
- goto free_irq;
- }
-
- cpus = kcalloc(nvec, sizeof(*cpus), GFP_KERNEL);
- if (!cpus) {
- err = -ENOMEM;
- goto free_mask;
- }
- for (i = 0; i < nvec; i++)
- cpus[i] = cpumask_local_spread(i, gc->numa_node);
-
for (i = 0; i < nvec; i++) {
- cpumask_set_cpu(cpus[i], req_mask);
gic = &gc->irq_contexts[i];
gic->handler = NULL;
gic->arg = NULL;
+ if (!i)
+ snprintf(gic->name, MANA_IRQ_NAME_SZ, "mana_hwc@pci:%s",
+ pci_name(pdev));
+ else
+ snprintf(gic->name, MANA_IRQ_NAME_SZ, "mana_q%d@pci:%s",
+ i - 1, pci_name(pdev));
+
irq = pci_irq_vector(pdev, i);
if (irq < 0) {
err = irq;
- goto free_mask;
+ goto free_irq;
}
- err = request_irq(irq, mana_gd_intr, 0, "mana_intr", gic);
+ err = request_irq(irq, mana_gd_intr, 0, gic->name, gic);
if (err)
- goto free_mask;
- irq_set_affinity_and_hint(irq, req_mask);
- cpumask_clear(req_mask);
+ goto free_irq;
+
+ cpu = cpumask_local_spread(i, gc->numa_node);
+ irq_set_affinity_and_hint(irq, cpumask_of(cpu));
}
- free_cpumask_var(req_mask);
- kfree(cpus);
err = mana_gd_alloc_res_map(nvec, &gc->msix_resource);
if (err)
@@ -1283,13 +1273,12 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev)
return 0;
-free_mask:
- free_cpumask_var(req_mask);
- kfree(cpus);
free_irq:
for (j = i - 1; j >= 0; j--) {
irq = pci_irq_vector(pdev, j);
gic = &gc->irq_contexts[j];
+
+ irq_update_affinity_hint(irq, NULL);
free_irq(irq, gic);
}
@@ -1317,6 +1306,9 @@ static void mana_gd_remove_irqs(struct pci_dev *pdev)
continue;
gic = &gc->irq_contexts[i];
+
+ /* Need to clear the hint before free_irq */
+ irq_update_affinity_hint(irq, NULL);
free_irq(irq, gic);
}
diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
index 7c0897e779dc..ee052404eb55 100644
--- a/drivers/net/ethernet/mscc/ocelot_flower.c
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c
@@ -605,6 +605,18 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress,
flow_rule_match_control(rule, &match);
}
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+ struct flow_match_vlan match;
+
+ flow_rule_match_vlan(rule, &match);
+ filter->key_type = OCELOT_VCAP_KEY_ANY;
+ filter->vlan.vid.value = match.key->vlan_id;
+ filter->vlan.vid.mask = match.mask->vlan_id;
+ filter->vlan.pcp.value[0] = match.key->vlan_priority;
+ filter->vlan.pcp.mask[0] = match.mask->vlan_priority;
+ match_protocol = false;
+ }
+
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
struct flow_match_eth_addrs match;
@@ -737,18 +749,6 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress,
match_protocol = false;
}
- if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
- struct flow_match_vlan match;
-
- flow_rule_match_vlan(rule, &match);
- filter->key_type = OCELOT_VCAP_KEY_ANY;
- filter->vlan.vid.value = match.key->vlan_id;
- filter->vlan.vid.mask = match.mask->vlan_id;
- filter->vlan.pcp.value[0] = match.key->vlan_priority;
- filter->vlan.pcp.mask[0] = match.mask->vlan_priority;
- match_protocol = false;
- }
-
finished_key_parsing:
if (match_protocol && proto != ETH_P_ALL) {
if (filter->block_id == VCAP_ES0) {
diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c
index 1a82f10c8853..2180ae94c744 100644
--- a/drivers/net/ethernet/mscc/ocelot_ptp.c
+++ b/drivers/net/ethernet/mscc/ocelot_ptp.c
@@ -335,8 +335,8 @@ static void
ocelot_populate_ipv6_ptp_event_trap_key(struct ocelot_vcap_filter *trap)
{
trap->key_type = OCELOT_VCAP_KEY_IPV6;
- trap->key.ipv4.proto.value[0] = IPPROTO_UDP;
- trap->key.ipv4.proto.mask[0] = 0xff;
+ trap->key.ipv6.proto.value[0] = IPPROTO_UDP;
+ trap->key.ipv6.proto.mask[0] = 0xff;
trap->key.ipv6.dport.value = PTP_EV_PORT;
trap->key.ipv6.dport.mask = 0xffff;
}
@@ -355,8 +355,8 @@ static void
ocelot_populate_ipv6_ptp_general_trap_key(struct ocelot_vcap_filter *trap)
{
trap->key_type = OCELOT_VCAP_KEY_IPV6;
- trap->key.ipv4.proto.value[0] = IPPROTO_UDP;
- trap->key.ipv4.proto.mask[0] = 0xff;
+ trap->key.ipv6.proto.value[0] = IPPROTO_UDP;
+ trap->key.ipv6.proto.mask[0] = 0xff;
trap->key.ipv6.dport.value = PTP_GEN_PORT;
trap->key.ipv6.dport.mask = 0xffff;
}
diff --git a/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c b/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c
index 4632268695cb..063cd371033a 100644
--- a/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c
+++ b/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c
@@ -129,26 +129,31 @@ struct nfp_ipsec_cfg_mssg {
};
};
-static int nfp_ipsec_cfg_cmd_issue(struct nfp_net *nn, int type, int saidx,
- struct nfp_ipsec_cfg_mssg *msg)
+static int nfp_net_ipsec_cfg(struct nfp_net *nn, struct nfp_mbox_amsg_entry *entry)
{
+ unsigned int offset = nn->tlv_caps.mbox_off + NFP_NET_CFG_MBOX_SIMPLE_VAL;
+ struct nfp_ipsec_cfg_mssg *msg = (struct nfp_ipsec_cfg_mssg *)entry->msg;
int i, msg_size, ret;
- msg->cmd = type;
- msg->sa_idx = saidx;
- msg->rsp = 0;
- msg_size = ARRAY_SIZE(msg->raw);
+ ret = nfp_net_mbox_lock(nn, sizeof(*msg));
+ if (ret)
+ return ret;
+ msg_size = ARRAY_SIZE(msg->raw);
for (i = 0; i < msg_size; i++)
- nn_writel(nn, NFP_NET_CFG_MBOX_VAL + 4 * i, msg->raw[i]);
+ nn_writel(nn, offset + 4 * i, msg->raw[i]);
- ret = nfp_net_mbox_reconfig(nn, NFP_NET_CFG_MBOX_CMD_IPSEC);
- if (ret < 0)
+ ret = nfp_net_mbox_reconfig(nn, entry->cmd);
+ if (ret < 0) {
+ nn_ctrl_bar_unlock(nn);
return ret;
+ }
/* For now we always read the whole message response back */
for (i = 0; i < msg_size; i++)
- msg->raw[i] = nn_readl(nn, NFP_NET_CFG_MBOX_VAL + 4 * i);
+ msg->raw[i] = nn_readl(nn, offset + 4 * i);
+
+ nn_ctrl_bar_unlock(nn);
switch (msg->rsp) {
case NFP_IPSEC_CFG_MSSG_OK:
@@ -477,7 +482,10 @@ static int nfp_net_xfrm_add_state(struct xfrm_state *x)
}
/* Allocate saidx and commit the SA */
- err = nfp_ipsec_cfg_cmd_issue(nn, NFP_IPSEC_CFG_MSSG_ADD_SA, saidx, &msg);
+ msg.cmd = NFP_IPSEC_CFG_MSSG_ADD_SA;
+ msg.sa_idx = saidx;
+ err = nfp_net_sched_mbox_amsg_work(nn, NFP_NET_CFG_MBOX_CMD_IPSEC, &msg,
+ sizeof(msg), nfp_net_ipsec_cfg);
if (err) {
xa_erase(&nn->xa_ipsec, saidx);
nn_err(nn, "Failed to issue IPsec command err ret=%d\n", err);
@@ -491,14 +499,17 @@ static int nfp_net_xfrm_add_state(struct xfrm_state *x)
static void nfp_net_xfrm_del_state(struct xfrm_state *x)
{
+ struct nfp_ipsec_cfg_mssg msg = {
+ .cmd = NFP_IPSEC_CFG_MSSG_INV_SA,
+ .sa_idx = x->xso.offload_handle - 1,
+ };
struct net_device *netdev = x->xso.dev;
- struct nfp_ipsec_cfg_mssg msg;
struct nfp_net *nn;
int err;
nn = netdev_priv(netdev);
- err = nfp_ipsec_cfg_cmd_issue(nn, NFP_IPSEC_CFG_MSSG_INV_SA,
- x->xso.offload_handle - 1, &msg);
+ err = nfp_net_sched_mbox_amsg_work(nn, NFP_NET_CFG_MBOX_CMD_IPSEC, &msg,
+ sizeof(msg), nfp_net_ipsec_cfg);
if (err)
nn_warn(nn, "Failed to invalidate SA in hardware\n");
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
index a8678d5612ee..060a77f2265d 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -460,6 +460,7 @@ nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app,
sizeof(struct nfp_tun_neigh_v4);
unsigned long cookie = (unsigned long)neigh;
struct nfp_flower_priv *priv = app->priv;
+ struct nfp_tun_neigh_lag lag_info;
struct nfp_neigh_entry *nn_entry;
u32 port_id;
u8 mtype;
@@ -468,6 +469,11 @@ nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app,
if (!port_id)
return;
+ if ((port_id & NFP_FL_LAG_OUT) == NFP_FL_LAG_OUT) {
+ memset(&lag_info, 0, sizeof(struct nfp_tun_neigh_lag));
+ nfp_flower_lag_get_info_from_netdev(app, netdev, &lag_info);
+ }
+
spin_lock_bh(&priv->predt_lock);
nn_entry = rhashtable_lookup_fast(&priv->neigh_table, &cookie,
neigh_table_params);
@@ -515,7 +521,7 @@ nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app,
neigh_ha_snapshot(common->dst_addr, neigh, netdev);
if ((port_id & NFP_FL_LAG_OUT) == NFP_FL_LAG_OUT)
- nfp_flower_lag_get_info_from_netdev(app, netdev, lag);
+ memcpy(lag, &lag_info, sizeof(struct nfp_tun_neigh_lag));
common->port_id = cpu_to_be32(port_id);
if (rhashtable_insert_fast(&priv->neigh_table,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index da33f09facb9..939cfce15830 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -617,6 +617,10 @@ struct nfp_net_dp {
* @vnic_no_name: For non-port PF vNIC make ndo_get_phys_port_name return
* -EOPNOTSUPP to keep backwards compatibility (set by app)
* @port: Pointer to nfp_port structure if vNIC is a port
+ * @mbox_amsg: Asynchronously processed message via mailbox
+ * @mbox_amsg.lock: Protect message list
+ * @mbox_amsg.list: List of message to process
+ * @mbox_amsg.work: Work to process message asynchronously
* @app_priv: APP private data for this vNIC
*/
struct nfp_net {
@@ -718,9 +722,25 @@ struct nfp_net {
struct nfp_port *port;
+ struct {
+ spinlock_t lock;
+ struct list_head list;
+ struct work_struct work;
+ } mbox_amsg;
+
void *app_priv;
};
+struct nfp_mbox_amsg_entry {
+ struct list_head list;
+ int (*cfg)(struct nfp_net *nn, struct nfp_mbox_amsg_entry *entry);
+ u32 cmd;
+ char msg[];
+};
+
+int nfp_net_sched_mbox_amsg_work(struct nfp_net *nn, u32 cmd, const void *data, size_t len,
+ int (*cb)(struct nfp_net *, struct nfp_mbox_amsg_entry *));
+
/* Functions to read/write from/to a BAR
* Performs any endian conversion necessary.
*/
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 09053373288f..70d7484c82af 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1334,9 +1334,54 @@ err_unlock:
return err;
}
-static int nfp_net_mc_cfg(struct net_device *netdev, const unsigned char *addr, const u32 cmd)
+int nfp_net_sched_mbox_amsg_work(struct nfp_net *nn, u32 cmd, const void *data, size_t len,
+ int (*cb)(struct nfp_net *, struct nfp_mbox_amsg_entry *))
{
- struct nfp_net *nn = netdev_priv(netdev);
+ struct nfp_mbox_amsg_entry *entry;
+
+ entry = kmalloc(sizeof(*entry) + len, GFP_ATOMIC);
+ if (!entry)
+ return -ENOMEM;
+
+ memcpy(entry->msg, data, len);
+ entry->cmd = cmd;
+ entry->cfg = cb;
+
+ spin_lock_bh(&nn->mbox_amsg.lock);
+ list_add_tail(&entry->list, &nn->mbox_amsg.list);
+ spin_unlock_bh(&nn->mbox_amsg.lock);
+
+ schedule_work(&nn->mbox_amsg.work);
+
+ return 0;
+}
+
+static void nfp_net_mbox_amsg_work(struct work_struct *work)
+{
+ struct nfp_net *nn = container_of(work, struct nfp_net, mbox_amsg.work);
+ struct nfp_mbox_amsg_entry *entry, *tmp;
+ struct list_head tmp_list;
+
+ INIT_LIST_HEAD(&tmp_list);
+
+ spin_lock_bh(&nn->mbox_amsg.lock);
+ list_splice_init(&nn->mbox_amsg.list, &tmp_list);
+ spin_unlock_bh(&nn->mbox_amsg.lock);
+
+ list_for_each_entry_safe(entry, tmp, &tmp_list, list) {
+ int err = entry->cfg(nn, entry);
+
+ if (err)
+ nn_err(nn, "Config cmd %d to HW failed %d.\n", entry->cmd, err);
+
+ list_del(&entry->list);
+ kfree(entry);
+ }
+}
+
+static int nfp_net_mc_cfg(struct nfp_net *nn, struct nfp_mbox_amsg_entry *entry)
+{
+ unsigned char *addr = entry->msg;
int ret;
ret = nfp_net_mbox_lock(nn, NFP_NET_CFG_MULTICAST_SZ);
@@ -1348,7 +1393,7 @@ static int nfp_net_mc_cfg(struct net_device *netdev, const unsigned char *addr,
nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_MULTICAST_MAC_LO,
get_unaligned_be16(addr + 4));
- return nfp_net_mbox_reconfig_and_unlock(nn, cmd);
+ return nfp_net_mbox_reconfig_and_unlock(nn, entry->cmd);
}
static int nfp_net_mc_sync(struct net_device *netdev, const unsigned char *addr)
@@ -1361,12 +1406,16 @@ static int nfp_net_mc_sync(struct net_device *netdev, const unsigned char *addr)
return -EINVAL;
}
- return nfp_net_mc_cfg(netdev, addr, NFP_NET_CFG_MBOX_CMD_MULTICAST_ADD);
+ return nfp_net_sched_mbox_amsg_work(nn, NFP_NET_CFG_MBOX_CMD_MULTICAST_ADD, addr,
+ NFP_NET_CFG_MULTICAST_SZ, nfp_net_mc_cfg);
}
static int nfp_net_mc_unsync(struct net_device *netdev, const unsigned char *addr)
{
- return nfp_net_mc_cfg(netdev, addr, NFP_NET_CFG_MBOX_CMD_MULTICAST_DEL);
+ struct nfp_net *nn = netdev_priv(netdev);
+
+ return nfp_net_sched_mbox_amsg_work(nn, NFP_NET_CFG_MBOX_CMD_MULTICAST_DEL, addr,
+ NFP_NET_CFG_MULTICAST_SZ, nfp_net_mc_cfg);
}
static void nfp_net_set_rx_mode(struct net_device *netdev)
@@ -2633,6 +2682,11 @@ int nfp_net_init(struct nfp_net *nn)
if (!nn->dp.netdev)
return 0;
+
+ spin_lock_init(&nn->mbox_amsg.lock);
+ INIT_LIST_HEAD(&nn->mbox_amsg.list);
+ INIT_WORK(&nn->mbox_amsg.work, nfp_net_mbox_amsg_work);
+
return register_netdev(nn->dp.netdev);
err_clean_mbox:
@@ -2652,5 +2706,6 @@ void nfp_net_clean(struct nfp_net *nn)
unregister_netdev(nn->dp.netdev);
nfp_net_ipsec_clean(nn);
nfp_ccm_mbox_clean(nn);
+ flush_work(&nn->mbox_amsg.work);
nfp_net_reconfig_wait_posted(nn);
}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index 51124309ae1f..f03dcadff738 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -403,7 +403,6 @@
*/
#define NFP_NET_CFG_MBOX_BASE 0x1800
#define NFP_NET_CFG_MBOX_VAL_MAX_SZ 0x1F8
-#define NFP_NET_CFG_MBOX_VAL 0x1808
#define NFP_NET_CFG_MBOX_SIMPLE_CMD 0x0
#define NFP_NET_CFG_MBOX_SIMPLE_RET 0x4
#define NFP_NET_CFG_MBOX_SIMPLE_VAL 0x8
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index a4a89ef3f18b..cc97b3d00414 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -293,35 +293,131 @@ nfp_net_set_fec_link_mode(struct nfp_eth_table_port *eth_port,
}
}
-static const u16 nfp_eth_media_table[] = {
- [NFP_MEDIA_1000BASE_CX] = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
- [NFP_MEDIA_1000BASE_KX] = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
- [NFP_MEDIA_10GBASE_KX4] = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
- [NFP_MEDIA_10GBASE_KR] = ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
- [NFP_MEDIA_10GBASE_CX4] = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
- [NFP_MEDIA_10GBASE_CR] = ETHTOOL_LINK_MODE_10000baseCR_Full_BIT,
- [NFP_MEDIA_10GBASE_SR] = ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
- [NFP_MEDIA_10GBASE_ER] = ETHTOOL_LINK_MODE_10000baseER_Full_BIT,
- [NFP_MEDIA_25GBASE_KR] = ETHTOOL_LINK_MODE_25000baseKR_Full_BIT,
- [NFP_MEDIA_25GBASE_KR_S] = ETHTOOL_LINK_MODE_25000baseKR_Full_BIT,
- [NFP_MEDIA_25GBASE_CR] = ETHTOOL_LINK_MODE_25000baseCR_Full_BIT,
- [NFP_MEDIA_25GBASE_CR_S] = ETHTOOL_LINK_MODE_25000baseCR_Full_BIT,
- [NFP_MEDIA_25GBASE_SR] = ETHTOOL_LINK_MODE_25000baseSR_Full_BIT,
- [NFP_MEDIA_40GBASE_CR4] = ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT,
- [NFP_MEDIA_40GBASE_KR4] = ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT,
- [NFP_MEDIA_40GBASE_SR4] = ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT,
- [NFP_MEDIA_40GBASE_LR4] = ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT,
- [NFP_MEDIA_50GBASE_KR] = ETHTOOL_LINK_MODE_50000baseKR_Full_BIT,
- [NFP_MEDIA_50GBASE_SR] = ETHTOOL_LINK_MODE_50000baseSR_Full_BIT,
- [NFP_MEDIA_50GBASE_CR] = ETHTOOL_LINK_MODE_50000baseCR_Full_BIT,
- [NFP_MEDIA_50GBASE_LR] = ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT,
- [NFP_MEDIA_50GBASE_ER] = ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT,
- [NFP_MEDIA_50GBASE_FR] = ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT,
- [NFP_MEDIA_100GBASE_KR4] = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
- [NFP_MEDIA_100GBASE_SR4] = ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT,
- [NFP_MEDIA_100GBASE_CR4] = ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT,
- [NFP_MEDIA_100GBASE_KP4] = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
- [NFP_MEDIA_100GBASE_CR10] = ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT,
+static const struct nfp_eth_media_link_mode {
+ u16 ethtool_link_mode;
+ u16 speed;
+} nfp_eth_media_table[NFP_MEDIA_LINK_MODES_NUMBER] = {
+ [NFP_MEDIA_1000BASE_CX] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
+ .speed = NFP_SPEED_1G,
+ },
+ [NFP_MEDIA_1000BASE_KX] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
+ .speed = NFP_SPEED_1G,
+ },
+ [NFP_MEDIA_10GBASE_KX4] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
+ .speed = NFP_SPEED_10G,
+ },
+ [NFP_MEDIA_10GBASE_KR] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
+ .speed = NFP_SPEED_10G,
+ },
+ [NFP_MEDIA_10GBASE_CX4] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
+ .speed = NFP_SPEED_10G,
+ },
+ [NFP_MEDIA_10GBASE_CR] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_10000baseCR_Full_BIT,
+ .speed = NFP_SPEED_10G,
+ },
+ [NFP_MEDIA_10GBASE_SR] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
+ .speed = NFP_SPEED_10G,
+ },
+ [NFP_MEDIA_10GBASE_ER] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_10000baseER_Full_BIT,
+ .speed = NFP_SPEED_10G,
+ },
+ [NFP_MEDIA_25GBASE_KR] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_25000baseKR_Full_BIT,
+ .speed = NFP_SPEED_25G,
+ },
+ [NFP_MEDIA_25GBASE_KR_S] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_25000baseKR_Full_BIT,
+ .speed = NFP_SPEED_25G,
+ },
+ [NFP_MEDIA_25GBASE_CR] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_25000baseCR_Full_BIT,
+ .speed = NFP_SPEED_25G,
+ },
+ [NFP_MEDIA_25GBASE_CR_S] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_25000baseCR_Full_BIT,
+ .speed = NFP_SPEED_25G,
+ },
+ [NFP_MEDIA_25GBASE_SR] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_25000baseSR_Full_BIT,
+ .speed = NFP_SPEED_25G,
+ },
+ [NFP_MEDIA_40GBASE_CR4] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT,
+ .speed = NFP_SPEED_40G,
+ },
+ [NFP_MEDIA_40GBASE_KR4] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT,
+ .speed = NFP_SPEED_40G,
+ },
+ [NFP_MEDIA_40GBASE_SR4] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT,
+ .speed = NFP_SPEED_40G,
+ },
+ [NFP_MEDIA_40GBASE_LR4] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT,
+ .speed = NFP_SPEED_40G,
+ },
+ [NFP_MEDIA_50GBASE_KR] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_50000baseKR_Full_BIT,
+ .speed = NFP_SPEED_50G,
+ },
+ [NFP_MEDIA_50GBASE_SR] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_50000baseSR_Full_BIT,
+ .speed = NFP_SPEED_50G,
+ },
+ [NFP_MEDIA_50GBASE_CR] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_50000baseCR_Full_BIT,
+ .speed = NFP_SPEED_50G,
+ },
+ [NFP_MEDIA_50GBASE_LR] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT,
+ .speed = NFP_SPEED_50G,
+ },
+ [NFP_MEDIA_50GBASE_ER] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT,
+ .speed = NFP_SPEED_50G,
+ },
+ [NFP_MEDIA_50GBASE_FR] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT,
+ .speed = NFP_SPEED_50G,
+ },
+ [NFP_MEDIA_100GBASE_KR4] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
+ .speed = NFP_SPEED_100G,
+ },
+ [NFP_MEDIA_100GBASE_SR4] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT,
+ .speed = NFP_SPEED_100G,
+ },
+ [NFP_MEDIA_100GBASE_CR4] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT,
+ .speed = NFP_SPEED_100G,
+ },
+ [NFP_MEDIA_100GBASE_KP4] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
+ .speed = NFP_SPEED_100G,
+ },
+ [NFP_MEDIA_100GBASE_CR10] = {
+ .ethtool_link_mode = ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT,
+ .speed = NFP_SPEED_100G,
+ },
+};
+
+static const unsigned int nfp_eth_speed_map[NFP_SUP_SPEED_NUMBER] = {
+ [NFP_SPEED_1G] = SPEED_1000,
+ [NFP_SPEED_10G] = SPEED_10000,
+ [NFP_SPEED_25G] = SPEED_25000,
+ [NFP_SPEED_40G] = SPEED_40000,
+ [NFP_SPEED_50G] = SPEED_50000,
+ [NFP_SPEED_100G] = SPEED_100000,
};
static void nfp_add_media_link_mode(struct nfp_port *port,
@@ -334,8 +430,12 @@ static void nfp_add_media_link_mode(struct nfp_port *port,
};
struct nfp_cpp *cpp = port->app->cpp;
- if (nfp_eth_read_media(cpp, &ethm))
+ if (nfp_eth_read_media(cpp, &ethm)) {
+ bitmap_fill(port->speed_bitmap, NFP_SUP_SPEED_NUMBER);
return;
+ }
+
+ bitmap_zero(port->speed_bitmap, NFP_SUP_SPEED_NUMBER);
for (u32 i = 0; i < 2; i++) {
supported_modes[i] = le64_to_cpu(ethm.supported_modes[i]);
@@ -344,20 +444,26 @@ static void nfp_add_media_link_mode(struct nfp_port *port,
for (u32 i = 0; i < NFP_MEDIA_LINK_MODES_NUMBER; i++) {
if (i < 64) {
- if (supported_modes[0] & BIT_ULL(i))
- __set_bit(nfp_eth_media_table[i],
+ if (supported_modes[0] & BIT_ULL(i)) {
+ __set_bit(nfp_eth_media_table[i].ethtool_link_mode,
cmd->link_modes.supported);
+ __set_bit(nfp_eth_media_table[i].speed,
+ port->speed_bitmap);
+ }
if (advertised_modes[0] & BIT_ULL(i))
- __set_bit(nfp_eth_media_table[i],
+ __set_bit(nfp_eth_media_table[i].ethtool_link_mode,
cmd->link_modes.advertising);
} else {
- if (supported_modes[1] & BIT_ULL(i - 64))
- __set_bit(nfp_eth_media_table[i],
+ if (supported_modes[1] & BIT_ULL(i - 64)) {
+ __set_bit(nfp_eth_media_table[i].ethtool_link_mode,
cmd->link_modes.supported);
+ __set_bit(nfp_eth_media_table[i].speed,
+ port->speed_bitmap);
+ }
if (advertised_modes[1] & BIT_ULL(i - 64))
- __set_bit(nfp_eth_media_table[i],
+ __set_bit(nfp_eth_media_table[i].ethtool_link_mode,
cmd->link_modes.advertising);
}
}
@@ -468,6 +574,22 @@ nfp_net_set_link_ksettings(struct net_device *netdev,
if (cmd->base.speed != SPEED_UNKNOWN) {
u32 speed = cmd->base.speed / eth_port->lanes;
+ bool is_supported = false;
+
+ for (u32 i = 0; i < NFP_SUP_SPEED_NUMBER; i++) {
+ if (cmd->base.speed == nfp_eth_speed_map[i] &&
+ test_bit(i, port->speed_bitmap)) {
+ is_supported = true;
+ break;
+ }
+ }
+
+ if (!is_supported) {
+ netdev_err(netdev, "Speed %u is not supported.\n",
+ cmd->base.speed);
+ err = -EINVAL;
+ goto err_bad_set;
+ }
if (req_aneg) {
netdev_err(netdev, "Speed changing is not allowed when working on autoneg mode.\n");
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h
index f8cd157ca1d7..9c04f9f0e2c9 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h
@@ -38,6 +38,16 @@ enum nfp_port_flags {
NFP_PORT_CHANGED = 0,
};
+enum {
+ NFP_SPEED_1G,
+ NFP_SPEED_10G,
+ NFP_SPEED_25G,
+ NFP_SPEED_40G,
+ NFP_SPEED_50G,
+ NFP_SPEED_100G,
+ NFP_SUP_SPEED_NUMBER
+};
+
/**
* struct nfp_port - structure representing NFP port
* @netdev: backpointer to associated netdev
@@ -52,6 +62,7 @@ enum nfp_port_flags {
* @eth_forced: for %NFP_PORT_PHYS_PORT port is forced UP or DOWN, don't change
* @eth_port: for %NFP_PORT_PHYS_PORT translated ETH Table port entry
* @eth_stats: for %NFP_PORT_PHYS_PORT MAC stats if available
+ * @speed_bitmap: for %NFP_PORT_PHYS_PORT supported speed bitmap
* @pf_id: for %NFP_PORT_PF_PORT, %NFP_PORT_VF_PORT ID of the PCI PF (0-3)
* @vf_id: for %NFP_PORT_VF_PORT ID of the PCI VF within @pf_id
* @pf_split: for %NFP_PORT_PF_PORT %true if PCI PF has more than one vNIC
@@ -78,6 +89,7 @@ struct nfp_port {
bool eth_forced;
struct nfp_eth_table_port *eth_port;
u8 __iomem *eth_stats;
+ DECLARE_BITMAP(speed_bitmap, NFP_SUP_SPEED_NUMBER);
};
/* NFP_PORT_PF_PORT, NFP_PORT_VF_PORT */
struct {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
index 626b9113e7c4..d911f4fd9af6 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -708,9 +708,16 @@ void ionic_q_post(struct ionic_queue *q, bool ring_doorbell, ionic_desc_cb cb,
q->lif->index, q->name, q->hw_type, q->hw_index,
q->head_idx, ring_doorbell);
- if (ring_doorbell)
+ if (ring_doorbell) {
ionic_dbell_ring(lif->kern_dbpage, q->hw_type,
q->dbval | q->head_idx);
+
+ q->dbell_jiffies = jiffies;
+
+ if (q_to_qcq(q)->napi_qcq)
+ mod_timer(&q_to_qcq(q)->napi_qcq->napi_deadline,
+ jiffies + IONIC_NAPI_DEADLINE);
+ }
}
static bool ionic_q_is_posted(struct ionic_queue *q, unsigned int pos)
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index 2a1d7b9c07e7..bce3ca38669b 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -25,6 +25,12 @@
#define IONIC_DEV_INFO_REG_COUNT 32
#define IONIC_DEV_CMD_REG_COUNT 32
+#define IONIC_NAPI_DEADLINE (HZ / 200) /* 5ms */
+#define IONIC_ADMIN_DOORBELL_DEADLINE (HZ / 2) /* 500ms */
+#define IONIC_TX_DOORBELL_DEADLINE (HZ / 100) /* 10ms */
+#define IONIC_RX_MIN_DOORBELL_DEADLINE (HZ / 100) /* 10ms */
+#define IONIC_RX_MAX_DOORBELL_DEADLINE (HZ * 5) /* 5s */
+
struct ionic_dev_bar {
void __iomem *vaddr;
phys_addr_t bus_addr;
@@ -216,6 +222,8 @@ struct ionic_queue {
struct ionic_lif *lif;
struct ionic_desc_info *info;
u64 dbval;
+ unsigned long dbell_deadline;
+ unsigned long dbell_jiffies;
u16 head_idx;
u16 tail_idx;
unsigned int index;
@@ -361,4 +369,8 @@ void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
int ionic_heartbeat_check(struct ionic *ionic);
bool ionic_is_fw_running(struct ionic_dev *idev);
+bool ionic_adminq_poke_doorbell(struct ionic_queue *q);
+bool ionic_txq_poke_doorbell(struct ionic_queue *q);
+bool ionic_rxq_poke_doorbell(struct ionic_queue *q);
+
#endif /* _IONIC_DEV_H_ */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 4dd16c487f2b..63a78a9ac241 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -16,6 +16,7 @@
#include "ionic.h"
#include "ionic_bus.h"
+#include "ionic_dev.h"
#include "ionic_lif.h"
#include "ionic_txrx.h"
#include "ionic_ethtool.h"
@@ -200,6 +201,13 @@ void ionic_link_status_check_request(struct ionic_lif *lif, bool can_sleep)
}
}
+static void ionic_napi_deadline(struct timer_list *timer)
+{
+ struct ionic_qcq *qcq = container_of(timer, struct ionic_qcq, napi_deadline);
+
+ napi_schedule(&qcq->napi);
+}
+
static irqreturn_t ionic_isr(int irq, void *data)
{
struct napi_struct *napi = data;
@@ -269,6 +277,7 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq)
.oper = IONIC_Q_ENABLE,
},
};
+ int ret;
idev = &lif->ionic->idev;
dev = lif->ionic->dev;
@@ -276,16 +285,24 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq)
dev_dbg(dev, "q_enable.index %d q_enable.qtype %d\n",
ctx.cmd.q_control.index, ctx.cmd.q_control.type);
+ if (qcq->flags & IONIC_QCQ_F_INTR)
+ ionic_intr_clean(idev->intr_ctrl, qcq->intr.index);
+
+ ret = ionic_adminq_post_wait(lif, &ctx);
+ if (ret)
+ return ret;
+
+ if (qcq->napi.poll)
+ napi_enable(&qcq->napi);
+
if (qcq->flags & IONIC_QCQ_F_INTR) {
irq_set_affinity_hint(qcq->intr.vector,
&qcq->intr.affinity_mask);
- napi_enable(&qcq->napi);
- ionic_intr_clean(idev->intr_ctrl, qcq->intr.index);
ionic_intr_mask(idev->intr_ctrl, qcq->intr.index,
IONIC_INTR_MASK_CLEAR);
}
- return ionic_adminq_post_wait(lif, &ctx);
+ return 0;
}
static int ionic_qcq_disable(struct ionic_lif *lif, struct ionic_qcq *qcq, int fw_err)
@@ -316,6 +333,7 @@ static int ionic_qcq_disable(struct ionic_lif *lif, struct ionic_qcq *qcq, int f
synchronize_irq(qcq->intr.vector);
irq_set_affinity_hint(qcq->intr.vector, NULL);
napi_disable(&qcq->napi);
+ del_timer_sync(&qcq->napi_deadline);
}
/* If there was a previous fw communcation error, don't bother with
@@ -451,6 +469,7 @@ static void ionic_link_qcq_interrupts(struct ionic_qcq *src_qcq,
n_qcq->intr.vector = src_qcq->intr.vector;
n_qcq->intr.index = src_qcq->intr.index;
+ n_qcq->napi_qcq = src_qcq->napi_qcq;
}
static int ionic_alloc_qcq_interrupt(struct ionic_lif *lif, struct ionic_qcq *qcq)
@@ -564,13 +583,15 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
}
if (flags & IONIC_QCQ_F_NOTIFYQ) {
- int q_size, cq_size;
+ int q_size;
- /* q & cq need to be contiguous in case of notifyq */
+ /* q & cq need to be contiguous in NotifyQ, so alloc it all in q
+ * and don't alloc qc. We leave new->qc_size and new->qc_base
+ * as 0 to be sure we don't try to free it later.
+ */
q_size = ALIGN(num_descs * desc_size, PAGE_SIZE);
- cq_size = ALIGN(num_descs * cq_desc_size, PAGE_SIZE);
-
- new->q_size = PAGE_SIZE + q_size + cq_size;
+ new->q_size = PAGE_SIZE + q_size +
+ ALIGN(num_descs * cq_desc_size, PAGE_SIZE);
new->q_base = dma_alloc_coherent(dev, new->q_size,
&new->q_base_pa, GFP_KERNEL);
if (!new->q_base) {
@@ -773,8 +794,14 @@ static int ionic_lif_txq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
dev_dbg(dev, "txq->hw_type %d\n", q->hw_type);
dev_dbg(dev, "txq->hw_index %d\n", q->hw_index);
- if (test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state))
+ q->dbell_deadline = IONIC_TX_DOORBELL_DEADLINE;
+ q->dbell_jiffies = jiffies;
+
+ if (test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state)) {
netif_napi_add(lif->netdev, &qcq->napi, ionic_tx_napi);
+ qcq->napi_qcq = qcq;
+ timer_setup(&qcq->napi_deadline, ionic_napi_deadline, 0);
+ }
qcq->flags |= IONIC_QCQ_F_INITED;
@@ -828,11 +855,17 @@ static int ionic_lif_rxq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
dev_dbg(dev, "rxq->hw_type %d\n", q->hw_type);
dev_dbg(dev, "rxq->hw_index %d\n", q->hw_index);
+ q->dbell_deadline = IONIC_RX_MIN_DOORBELL_DEADLINE;
+ q->dbell_jiffies = jiffies;
+
if (test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state))
netif_napi_add(lif->netdev, &qcq->napi, ionic_rx_napi);
else
netif_napi_add(lif->netdev, &qcq->napi, ionic_txrx_napi);
+ qcq->napi_qcq = qcq;
+ timer_setup(&qcq->napi_deadline, ionic_napi_deadline, 0);
+
qcq->flags |= IONIC_QCQ_F_INITED;
return 0;
@@ -1150,6 +1183,7 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget)
struct ionic_dev *idev = &lif->ionic->idev;
unsigned long irqflags;
unsigned int flags = 0;
+ bool resched = false;
int rx_work = 0;
int tx_work = 0;
int n_work = 0;
@@ -1187,6 +1221,16 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget)
ionic_intr_credits(idev->intr_ctrl, intr->index, credits, flags);
}
+ if (!a_work && ionic_adminq_poke_doorbell(&lif->adminqcq->q))
+ resched = true;
+ if (lif->hwstamp_rxq && !rx_work && ionic_rxq_poke_doorbell(&lif->hwstamp_rxq->q))
+ resched = true;
+ if (lif->hwstamp_txq && !tx_work && ionic_txq_poke_doorbell(&lif->hwstamp_txq->q))
+ resched = true;
+ if (resched)
+ mod_timer(&lif->adminqcq->napi_deadline,
+ jiffies + IONIC_NAPI_DEADLINE);
+
return work_done;
}
@@ -3245,8 +3289,14 @@ static int ionic_lif_adminq_init(struct ionic_lif *lif)
dev_dbg(dev, "adminq->hw_type %d\n", q->hw_type);
dev_dbg(dev, "adminq->hw_index %d\n", q->hw_index);
+ q->dbell_deadline = IONIC_ADMIN_DOORBELL_DEADLINE;
+ q->dbell_jiffies = jiffies;
+
netif_napi_add(lif->netdev, &qcq->napi, ionic_adminq_napi);
+ qcq->napi_qcq = qcq;
+ timer_setup(&qcq->napi_deadline, ionic_napi_deadline, 0);
+
napi_enable(&qcq->napi);
if (qcq->flags & IONIC_QCQ_F_INTR)
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index a53984bf3544..734519895614 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -74,8 +74,10 @@ struct ionic_qcq {
struct ionic_queue q;
struct ionic_cq cq;
struct ionic_intr_info intr;
+ struct timer_list napi_deadline;
struct napi_struct napi;
unsigned int flags;
+ struct ionic_qcq *napi_qcq;
struct dentry *dentry;
};
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index a13530ec4dd8..08c42b039d92 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -289,6 +289,35 @@ static void ionic_adminq_cb(struct ionic_queue *q,
complete_all(&ctx->work);
}
+bool ionic_adminq_poke_doorbell(struct ionic_queue *q)
+{
+ struct ionic_lif *lif = q->lif;
+ unsigned long now, then, dif;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&lif->adminq_lock, irqflags);
+
+ if (q->tail_idx == q->head_idx) {
+ spin_unlock_irqrestore(&lif->adminq_lock, irqflags);
+ return false;
+ }
+
+ now = READ_ONCE(jiffies);
+ then = q->dbell_jiffies;
+ dif = now - then;
+
+ if (dif > q->dbell_deadline) {
+ ionic_dbell_ring(q->lif->kern_dbpage, q->hw_type,
+ q->dbval | q->head_idx);
+
+ q->dbell_jiffies = now;
+ }
+
+ spin_unlock_irqrestore(&lif->adminq_lock, irqflags);
+
+ return true;
+}
+
int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
{
struct ionic_desc_info *desc_info;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 0c3977416cd1..f761780f0162 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -22,6 +22,67 @@ static inline void ionic_rxq_post(struct ionic_queue *q, bool ring_dbell,
ionic_q_post(q, ring_dbell, cb_func, cb_arg);
}
+bool ionic_txq_poke_doorbell(struct ionic_queue *q)
+{
+ unsigned long now, then, dif;
+ struct netdev_queue *netdev_txq;
+ struct net_device *netdev;
+
+ netdev = q->lif->netdev;
+ netdev_txq = netdev_get_tx_queue(netdev, q->index);
+
+ HARD_TX_LOCK(netdev, netdev_txq, smp_processor_id());
+
+ if (q->tail_idx == q->head_idx) {
+ HARD_TX_UNLOCK(netdev, netdev_txq);
+ return false;
+ }
+
+ now = READ_ONCE(jiffies);
+ then = q->dbell_jiffies;
+ dif = now - then;
+
+ if (dif > q->dbell_deadline) {
+ ionic_dbell_ring(q->lif->kern_dbpage, q->hw_type,
+ q->dbval | q->head_idx);
+
+ q->dbell_jiffies = now;
+ }
+
+ HARD_TX_UNLOCK(netdev, netdev_txq);
+
+ return true;
+}
+
+bool ionic_rxq_poke_doorbell(struct ionic_queue *q)
+{
+ unsigned long now, then, dif;
+
+ /* no lock, called from rx napi or txrx napi, nothing else can fill */
+
+ if (q->tail_idx == q->head_idx)
+ return false;
+
+ now = READ_ONCE(jiffies);
+ then = q->dbell_jiffies;
+ dif = now - then;
+
+ if (dif > q->dbell_deadline) {
+ ionic_dbell_ring(q->lif->kern_dbpage, q->hw_type,
+ q->dbval | q->head_idx);
+
+ q->dbell_jiffies = now;
+
+ dif = 2 * q->dbell_deadline;
+ if (dif > IONIC_RX_MAX_DOORBELL_DEADLINE)
+ dif = IONIC_RX_MAX_DOORBELL_DEADLINE;
+
+ q->dbell_deadline = dif;
+ }
+
+ return true;
+}
+
static inline struct netdev_queue *q_to_ndq(struct ionic_queue *q)
{
return netdev_get_tx_queue(q->lif->netdev, q->index);
@@ -424,6 +485,12 @@ void ionic_rx_fill(struct ionic_queue *q)
ionic_dbell_ring(q->lif->kern_dbpage, q->hw_type,
q->dbval | q->head_idx);
+
+ q->dbell_deadline = IONIC_RX_MIN_DOORBELL_DEADLINE;
+ q->dbell_jiffies = jiffies;
+
+ mod_timer(&q_to_qcq(q)->napi_qcq->napi_deadline,
+ jiffies + IONIC_NAPI_DEADLINE);
}
void ionic_rx_empty(struct ionic_queue *q)
@@ -511,6 +578,9 @@ int ionic_tx_napi(struct napi_struct *napi, int budget)
work_done, flags);
}
+ if (!work_done && ionic_txq_poke_doorbell(&qcq->q))
+ mod_timer(&qcq->napi_deadline, jiffies + IONIC_NAPI_DEADLINE);
+
return work_done;
}
@@ -544,23 +614,29 @@ int ionic_rx_napi(struct napi_struct *napi, int budget)
work_done, flags);
}
+ if (!work_done && ionic_rxq_poke_doorbell(&qcq->q))
+ mod_timer(&qcq->napi_deadline, jiffies + IONIC_NAPI_DEADLINE);
+
return work_done;
}
int ionic_txrx_napi(struct napi_struct *napi, int budget)
{
- struct ionic_qcq *qcq = napi_to_qcq(napi);
+ struct ionic_qcq *rxqcq = napi_to_qcq(napi);
struct ionic_cq *rxcq = napi_to_cq(napi);
unsigned int qi = rxcq->bound_q->index;
+ struct ionic_qcq *txqcq;
struct ionic_dev *idev;
struct ionic_lif *lif;
struct ionic_cq *txcq;
+ bool resched = false;
u32 rx_work_done = 0;
u32 tx_work_done = 0;
u32 flags = 0;
lif = rxcq->bound_q->lif;
idev = &lif->ionic->idev;
+ txqcq = lif->txqcqs[qi];
txcq = &lif->txqcqs[qi]->cq;
tx_work_done = ionic_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT,
@@ -572,7 +648,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
ionic_rx_fill(rxcq->bound_q);
if (rx_work_done < budget && napi_complete_done(napi, rx_work_done)) {
- ionic_dim_update(qcq, 0);
+ ionic_dim_update(rxqcq, 0);
flags |= IONIC_INTR_CRED_UNMASK;
rxcq->bound_intr->rearm_count++;
}
@@ -583,6 +659,13 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
tx_work_done + rx_work_done, flags);
}
+ if (!rx_work_done && ionic_rxq_poke_doorbell(&rxqcq->q))
+ resched = true;
+ if (!tx_work_done && ionic_txq_poke_doorbell(&txqcq->q))
+ resched = true;
+ if (resched)
+ mod_timer(&rxqcq->napi_deadline, jiffies + IONIC_NAPI_DEADLINE);
+
return rx_work_done;
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
index 86ecb080b153..cdcead614e9f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
@@ -1832,7 +1832,8 @@ static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
u32 image_type,
u32 *nvram_offset_bytes,
- u32 *nvram_size_bytes)
+ u32 *nvram_size_bytes,
+ bool b_can_sleep)
{
u32 ret_mcp_resp, ret_mcp_param, ret_txn_size;
struct mcp_file_att file_att;
@@ -1846,7 +1847,8 @@ static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn,
&ret_mcp_resp,
&ret_mcp_param,
&ret_txn_size,
- (u32 *)&file_att, false);
+ (u32 *)&file_att,
+ b_can_sleep);
/* Check response */
if (nvm_result || (ret_mcp_resp & FW_MSG_CODE_MASK) !=
@@ -1873,7 +1875,9 @@ static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn,
static enum dbg_status qed_nvram_read(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
u32 nvram_offset_bytes,
- u32 nvram_size_bytes, u32 *ret_buf)
+ u32 nvram_size_bytes,
+ u32 *ret_buf,
+ bool b_can_sleep)
{
u32 ret_mcp_resp, ret_mcp_param, ret_read_size, bytes_to_copy;
s32 bytes_left = nvram_size_bytes;
@@ -1899,7 +1903,7 @@ static enum dbg_status qed_nvram_read(struct qed_hwfn *p_hwfn,
&ret_mcp_resp,
&ret_mcp_param, &ret_read_size,
(u32 *)((u8 *)ret_buf + read_offset),
- false))
+ b_can_sleep))
return DBG_STATUS_NVRAM_READ_FAILED;
/* Check response */
@@ -3380,7 +3384,8 @@ static u32 qed_grc_dump_mcp_hw_dump(struct qed_hwfn *p_hwfn,
p_ptt,
NVM_TYPE_HW_DUMP_OUT,
&hw_dump_offset_bytes,
- &hw_dump_size_bytes);
+ &hw_dump_size_bytes,
+ false);
if (status != DBG_STATUS_OK)
return 0;
@@ -3397,7 +3402,9 @@ static u32 qed_grc_dump_mcp_hw_dump(struct qed_hwfn *p_hwfn,
status = qed_nvram_read(p_hwfn,
p_ptt,
hw_dump_offset_bytes,
- hw_dump_size_bytes, dump_buf + offset);
+ hw_dump_size_bytes,
+ dump_buf + offset,
+ false);
if (status != DBG_STATUS_OK) {
DP_NOTICE(p_hwfn,
"Failed to read MCP HW Dump image from NVRAM\n");
@@ -4123,7 +4130,9 @@ static enum dbg_status qed_mcp_trace_get_meta_info(struct qed_hwfn *p_hwfn,
return qed_find_nvram_image(p_hwfn,
p_ptt,
nvram_image_type,
- trace_meta_offset, trace_meta_size);
+ trace_meta_offset,
+ trace_meta_size,
+ true);
}
/* Reads the MCP Trace meta data from NVRAM into the specified buffer */
@@ -4139,7 +4148,10 @@ static enum dbg_status qed_mcp_trace_read_meta(struct qed_hwfn *p_hwfn,
/* Read meta data from NVRAM */
status = qed_nvram_read(p_hwfn,
p_ptt,
- nvram_offset_in_bytes, size_in_bytes, buf);
+ nvram_offset_in_bytes,
+ size_in_bytes,
+ buf,
+ true);
if (status != DBG_STATUS_OK)
return status;
diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 7c2af482192d..cb1746bc0e0c 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -1438,6 +1438,10 @@ int qede_poll(struct napi_struct *napi, int budget)
rx_work_done = (likely(fp->type & QEDE_FASTPATH_RX) &&
qede_has_rx_work(fp->rxq)) ?
qede_rx_int(fp, budget) : 0;
+
+ if (fp->xdp_xmit & QEDE_XDP_REDIRECT)
+ xdp_do_flush();
+
/* Handle case where we are called by netpoll with a budget of 0 */
if (rx_work_done < budget || !budget) {
if (!qede_poll_is_more_work(fp)) {
@@ -1457,9 +1461,6 @@ int qede_poll(struct napi_struct *napi, int budget)
qede_update_tx_producer(fp->xdp_tx);
}
- if (fp->xdp_xmit & QEDE_XDP_REDIRECT)
- xdp_do_flush_map();
-
return rx_work_done;
}
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
index dbb800769cb6..c95d56e56c59 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
@@ -2505,7 +2505,13 @@ int qlcnic_83xx_init(struct qlcnic_adapter *adapter)
goto disable_mbx_intr;
qlcnic_83xx_clear_function_resources(adapter);
- qlcnic_dcb_enable(adapter->dcb);
+
+ err = qlcnic_dcb_enable(adapter->dcb);
+ if (err) {
+ qlcnic_dcb_free(adapter->dcb);
+ goto disable_mbx_intr;
+ }
+
qlcnic_83xx_initialize_nic(adapter, 1);
qlcnic_dcb_get_info(adapter->dcb);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h
index 7519773eaca6..22afa2be85fd 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h
@@ -41,11 +41,6 @@ struct qlcnic_dcb {
unsigned long state;
};
-static inline void qlcnic_clear_dcb_ops(struct qlcnic_dcb *dcb)
-{
- kfree(dcb);
-}
-
static inline int qlcnic_dcb_get_hw_capability(struct qlcnic_dcb *dcb)
{
if (dcb && dcb->ops->get_hw_capability)
@@ -112,9 +107,8 @@ static inline void qlcnic_dcb_init_dcbnl_ops(struct qlcnic_dcb *dcb)
dcb->ops->init_dcbnl_ops(dcb);
}
-static inline void qlcnic_dcb_enable(struct qlcnic_dcb *dcb)
+static inline int qlcnic_dcb_enable(struct qlcnic_dcb *dcb)
{
- if (dcb && qlcnic_dcb_attach(dcb))
- qlcnic_clear_dcb_ops(dcb);
+ return dcb ? qlcnic_dcb_attach(dcb) : 0;
}
#endif
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 28476b982bab..44dac3c0908e 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -2599,7 +2599,13 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
"Device does not support MSI interrupts\n");
if (qlcnic_82xx_check(adapter)) {
- qlcnic_dcb_enable(adapter->dcb);
+ err = qlcnic_dcb_enable(adapter->dcb);
+ if (err) {
+ qlcnic_dcb_free(adapter->dcb);
+ dev_err(&pdev->dev, "Failed to enable DCB\n");
+ goto err_out_free_hw;
+ }
+
qlcnic_dcb_get_info(adapter->dcb);
err = qlcnic_setup_intr(adapter);
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index a9dcc98b6af1..dadd61bccfe7 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1996,10 +1996,7 @@ static enum mac_version rtl8169_get_mac_version(u16 xid, bool gmii)
/* 8168F family. */
{ 0x7c8, 0x488, RTL_GIGA_MAC_VER_38 },
- /* It seems this chip version never made it to
- * the wild. Let's disable detection.
- * { 0x7cf, 0x481, RTL_GIGA_MAC_VER_36 },
- */
+ { 0x7cf, 0x481, RTL_GIGA_MAC_VER_36 },
{ 0x7cf, 0x480, RTL_GIGA_MAC_VER_35 },
/* 8168E family. */
@@ -2210,28 +2207,6 @@ static int rtl_set_mac_address(struct net_device *dev, void *p)
return 0;
}
-static void rtl_wol_enable_rx(struct rtl8169_private *tp)
-{
- if (tp->mac_version >= RTL_GIGA_MAC_VER_25)
- RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) |
- AcceptBroadcast | AcceptMulticast | AcceptMyPhys);
-}
-
-static void rtl_prepare_power_down(struct rtl8169_private *tp)
-{
- if (tp->dash_type != RTL_DASH_NONE)
- return;
-
- if (tp->mac_version == RTL_GIGA_MAC_VER_32 ||
- tp->mac_version == RTL_GIGA_MAC_VER_33)
- rtl_ephy_write(tp, 0x19, 0xff64);
-
- if (device_may_wakeup(tp_to_dev(tp))) {
- phy_speed_down(tp->phydev, false);
- rtl_wol_enable_rx(tp);
- }
-}
-
static void rtl_init_rxcfg(struct rtl8169_private *tp)
{
switch (tp->mac_version) {
@@ -2455,6 +2430,31 @@ static void rtl_enable_rxdvgate(struct rtl8169_private *tp)
rtl_wait_txrx_fifo_empty(tp);
}
+static void rtl_wol_enable_rx(struct rtl8169_private *tp)
+{
+ if (tp->mac_version >= RTL_GIGA_MAC_VER_25)
+ RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) |
+ AcceptBroadcast | AcceptMulticast | AcceptMyPhys);
+
+ if (tp->mac_version >= RTL_GIGA_MAC_VER_40)
+ rtl_disable_rxdvgate(tp);
+}
+
+static void rtl_prepare_power_down(struct rtl8169_private *tp)
+{
+ if (tp->dash_type != RTL_DASH_NONE)
+ return;
+
+ if (tp->mac_version == RTL_GIGA_MAC_VER_32 ||
+ tp->mac_version == RTL_GIGA_MAC_VER_33)
+ rtl_ephy_write(tp, 0x19, 0xff64);
+
+ if (device_may_wakeup(tp_to_dev(tp))) {
+ phy_speed_down(tp->phydev, false);
+ rtl_wol_enable_rx(tp);
+ }
+}
+
static void rtl_set_tx_config_registers(struct rtl8169_private *tp)
{
u32 val = TX_DMA_BURST << TxDMAShift |
@@ -3872,7 +3872,7 @@ static void rtl8169_tx_clear(struct rtl8169_private *tp)
netdev_reset_queue(tp->dev);
}
-static void rtl8169_cleanup(struct rtl8169_private *tp, bool going_down)
+static void rtl8169_cleanup(struct rtl8169_private *tp)
{
napi_disable(&tp->napi);
@@ -3884,9 +3884,6 @@ static void rtl8169_cleanup(struct rtl8169_private *tp, bool going_down)
rtl_rx_close(tp);
- if (going_down && tp->dev->wol_enabled)
- goto no_reset;
-
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_28:
case RTL_GIGA_MAC_VER_31:
@@ -3907,7 +3904,7 @@ static void rtl8169_cleanup(struct rtl8169_private *tp, bool going_down)
}
rtl_hw_reset(tp);
-no_reset:
+
rtl8169_tx_clear(tp);
rtl8169_init_ring_indexes(tp);
}
@@ -3918,7 +3915,7 @@ static void rtl_reset_work(struct rtl8169_private *tp)
netif_stop_queue(tp->dev);
- rtl8169_cleanup(tp, false);
+ rtl8169_cleanup(tp);
for (i = 0; i < NUM_RX_DESC; i++)
rtl8169_mark_to_asic(tp->RxDescArray + i);
@@ -4605,7 +4602,7 @@ static void rtl8169_down(struct rtl8169_private *tp)
pci_clear_master(tp->pci_dev);
rtl_pci_commit(tp);
- rtl8169_cleanup(tp, true);
+ rtl8169_cleanup(tp);
rtl_disable_exit_l1(tp);
rtl_prepare_power_down(tp);
}
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index b4e0fc7f65bd..0f54849a3823 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1101,14 +1101,14 @@ static void ravb_error_interrupt(struct net_device *ndev)
ravb_write(ndev, ~(EIS_QFS | EIS_RESERVED), EIS);
if (eis & EIS_QFS) {
ris2 = ravb_read(ndev, RIS2);
- ravb_write(ndev, ~(RIS2_QFF0 | RIS2_RFFF | RIS2_RESERVED),
+ ravb_write(ndev, ~(RIS2_QFF0 | RIS2_QFF1 | RIS2_RFFF | RIS2_RESERVED),
RIS2);
/* Receive Descriptor Empty int */
if (ris2 & RIS2_QFF0)
priv->stats[RAVB_BE].rx_over_errors++;
- /* Receive Descriptor Empty int */
+ /* Receive Descriptor Empty int */
if (ris2 & RIS2_QFF1)
priv->stats[RAVB_NC].rx_over_errors++;
@@ -2973,6 +2973,9 @@ static int __maybe_unused ravb_suspend(struct device *dev)
else
ret = ravb_close(ndev);
+ if (priv->info->ccc_gac)
+ ravb_ptp_stop(ndev);
+
return ret;
}
@@ -3011,6 +3014,9 @@ static int __maybe_unused ravb_resume(struct device *dev)
/* Restore descriptor base address table */
ravb_write(ndev, priv->desc_bat_dma, DBAT);
+ if (priv->info->ccc_gac)
+ ravb_ptp_init(ndev, priv->pdev);
+
if (netif_running(ndev)) {
if (priv->wol_enabled) {
ret = ravb_wol_restore(ndev);
diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
index e42ceaa0099f..2370c7797a0a 100644
--- a/drivers/net/ethernet/renesas/rswitch.c
+++ b/drivers/net/ethernet/renesas/rswitch.c
@@ -1074,8 +1074,11 @@ static struct device_node *rswitch_get_port_node(struct rswitch_device *rdev)
port = NULL;
goto out;
}
- if (index == rdev->etha->index)
+ if (index == rdev->etha->index) {
+ if (!of_device_is_available(port))
+ port = NULL;
break;
+ }
}
out:
@@ -1106,7 +1109,7 @@ static int rswitch_etha_get_params(struct rswitch_device *rdev)
port = rswitch_get_port_node(rdev);
if (!port)
- return -ENODEV;
+ return 0; /* ignored */
err = of_get_phy_mode(port, &rdev->etha->phy_interface);
of_node_put(port);
@@ -1324,13 +1327,13 @@ static int rswitch_ether_port_init_all(struct rswitch_private *priv)
{
int i, err;
- for (i = 0; i < RSWITCH_NUM_PORTS; i++) {
+ rswitch_for_each_enabled_port(priv, i) {
err = rswitch_ether_port_init_one(priv->rdev[i]);
if (err)
goto err_init_one;
}
- for (i = 0; i < RSWITCH_NUM_PORTS; i++) {
+ rswitch_for_each_enabled_port(priv, i) {
err = rswitch_serdes_init(priv->rdev[i]);
if (err)
goto err_serdes;
@@ -1339,12 +1342,12 @@ static int rswitch_ether_port_init_all(struct rswitch_private *priv)
return 0;
err_serdes:
- for (i--; i >= 0; i--)
+ rswitch_for_each_enabled_port_continue_reverse(priv, i)
rswitch_serdes_deinit(priv->rdev[i]);
i = RSWITCH_NUM_PORTS;
err_init_one:
- for (i--; i >= 0; i--)
+ rswitch_for_each_enabled_port_continue_reverse(priv, i)
rswitch_ether_port_deinit_one(priv->rdev[i]);
return err;
@@ -1578,6 +1581,7 @@ static int rswitch_device_alloc(struct rswitch_private *priv, int index)
{
struct platform_device *pdev = priv->pdev;
struct rswitch_device *rdev;
+ struct device_node *port;
struct net_device *ndev;
int err;
@@ -1606,7 +1610,10 @@ static int rswitch_device_alloc(struct rswitch_private *priv, int index)
netif_napi_add(ndev, &rdev->napi, rswitch_poll);
- err = of_get_ethdev_address(pdev->dev.of_node, ndev);
+ port = rswitch_get_port_node(rdev);
+ rdev->disabled = !port;
+ err = of_get_ethdev_address(port, ndev);
+ of_node_put(port);
if (err) {
if (is_valid_ether_addr(rdev->etha->mac_addr))
eth_hw_addr_set(ndev, rdev->etha->mac_addr);
@@ -1704,16 +1711,16 @@ static int rswitch_init(struct rswitch_private *priv)
if (err)
goto err_ether_port_init_all;
- for (i = 0; i < RSWITCH_NUM_PORTS; i++) {
+ rswitch_for_each_enabled_port(priv, i) {
err = register_netdev(priv->rdev[i]->ndev);
if (err) {
- for (i--; i >= 0; i--)
+ rswitch_for_each_enabled_port_continue_reverse(priv, i)
unregister_netdev(priv->rdev[i]->ndev);
goto err_register_netdev;
}
}
- for (i = 0; i < RSWITCH_NUM_PORTS; i++)
+ rswitch_for_each_enabled_port(priv, i)
netdev_info(priv->rdev[i]->ndev, "MAC address %pM\n",
priv->rdev[i]->ndev->dev_addr);
@@ -1786,6 +1793,11 @@ static int renesas_eth_sw_probe(struct platform_device *pdev)
pm_runtime_get_sync(&pdev->dev);
ret = rswitch_init(priv);
+ if (ret < 0) {
+ pm_runtime_put(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+ return ret;
+ }
device_set_wakeup_capable(&pdev->dev, 1);
diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h
index edbdd1b98d3d..49efb0f31c77 100644
--- a/drivers/net/ethernet/renesas/rswitch.h
+++ b/drivers/net/ethernet/renesas/rswitch.h
@@ -13,6 +13,17 @@
#define RSWITCH_MAX_NUM_QUEUES 128
#define RSWITCH_NUM_PORTS 3
+#define rswitch_for_each_enabled_port(priv, i) \
+ for (i = 0; i < RSWITCH_NUM_PORTS; i++) \
+ if (priv->rdev[i]->disabled) \
+ continue; \
+ else
+
+#define rswitch_for_each_enabled_port_continue_reverse(priv, i) \
+ for (i--; i >= 0; i--) \
+ if (priv->rdev[i]->disabled) \
+ continue; \
+ else
#define TX_RING_SIZE 1024
#define RX_RING_SIZE 1024
@@ -938,6 +949,7 @@ struct rswitch_device {
struct rswitch_gwca_queue *tx_queue;
struct rswitch_gwca_queue *rx_queue;
u8 ts_tag;
+ bool disabled;
int port;
struct rswitch_etha *etha;
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 0556542d7a6b..3a86f1213a05 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -1003,8 +1003,11 @@ static int efx_pci_probe_post_io(struct efx_nic *efx)
/* Determine netdevice features */
net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_RXALL);
- if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
+ if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) {
net_dev->features |= NETIF_F_TSO6;
+ if (efx_has_cap(efx, TX_TSO_V2_ENCAP))
+ net_dev->hw_enc_features |= NETIF_F_TSO6;
+ }
/* Check whether device supports TSO */
if (!efx->type->tso_versions || !efx->type->tso_versions(efx))
net_dev->features &= ~NETIF_F_ALL_TSO;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
index d42e1afb6521..2f7d8e4561d9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
@@ -90,7 +90,6 @@ struct mediatek_dwmac_plat_data {
struct mediatek_dwmac_variant {
int (*dwmac_set_phy_interface)(struct mediatek_dwmac_plat_data *plat);
int (*dwmac_set_delay)(struct mediatek_dwmac_plat_data *plat);
- void (*dwmac_fix_mac_speed)(void *priv, unsigned int speed);
/* clock ids to be requested */
const char * const *clk_list;
@@ -443,32 +442,9 @@ static int mt8195_set_delay(struct mediatek_dwmac_plat_data *plat)
return 0;
}
-static void mt8195_fix_mac_speed(void *priv, unsigned int speed)
-{
- struct mediatek_dwmac_plat_data *priv_plat = priv;
-
- if ((phy_interface_mode_is_rgmii(priv_plat->phy_mode))) {
- /* prefer 2ns fixed delay which is controlled by TXC_PHASE_CTRL,
- * when link speed is 1Gbps with RGMII interface,
- * Fall back to delay macro circuit for 10/100Mbps link speed.
- */
- if (speed == SPEED_1000)
- regmap_update_bits(priv_plat->peri_regmap,
- MT8195_PERI_ETH_CTRL0,
- MT8195_RGMII_TXC_PHASE_CTRL |
- MT8195_DLY_GTXC_ENABLE |
- MT8195_DLY_GTXC_INV |
- MT8195_DLY_GTXC_STAGES,
- MT8195_RGMII_TXC_PHASE_CTRL);
- else
- mt8195_set_delay(priv_plat);
- }
-}
-
static const struct mediatek_dwmac_variant mt8195_gmac_variant = {
.dwmac_set_phy_interface = mt8195_set_interface,
.dwmac_set_delay = mt8195_set_delay,
- .dwmac_fix_mac_speed = mt8195_fix_mac_speed,
.clk_list = mt8195_dwmac_clk_l,
.num_clks = ARRAY_SIZE(mt8195_dwmac_clk_l),
.dma_bit_mask = 35,
@@ -619,8 +595,6 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev,
plat->bsp_priv = priv_plat;
plat->init = mediatek_dwmac_init;
plat->clks_config = mediatek_dwmac_clks_config;
- if (priv_plat->variant->dwmac_fix_mac_speed)
- plat->fix_mac_speed = priv_plat->variant->dwmac_fix_mac_speed;
plat->safety_feat_cfg = devm_kzalloc(&pdev->dev,
sizeof(*plat->safety_feat_cfg),
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index 835caa15d55f..732774645c1a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -560,6 +560,8 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
plat_dat->has_gmac4 = 1;
plat_dat->pmt = 1;
plat_dat->tso_en = of_property_read_bool(np, "snps,tso");
+ if (of_device_is_compatible(np, "qcom,qcs404-ethqos"))
+ plat_dat->rx_clk_runs_in_lpi = 1;
ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
if (ret)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
index 9c2d40f853ed..e95d35f1e5a0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
@@ -186,11 +186,25 @@ static void dwmac5_handle_dma_err(struct net_device *ndev,
int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp,
struct stmmac_safety_feature_cfg *safety_feat_cfg)
{
+ struct stmmac_safety_feature_cfg all_safety_feats = {
+ .tsoee = 1,
+ .mrxpee = 1,
+ .mestee = 1,
+ .mrxee = 1,
+ .mtxee = 1,
+ .epsi = 1,
+ .edpp = 1,
+ .prtyen = 1,
+ .tmouten = 1,
+ };
u32 value;
if (!asp)
return -EINVAL;
+ if (!safety_feat_cfg)
+ safety_feat_cfg = &all_safety_feats;
+
/* 1. Enable Safety Features */
value = readl(ioaddr + MTL_ECC_CONTROL);
value |= MEEAO; /* MTL ECC Error Addr Status Override */
@@ -527,9 +541,9 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index,
return 0;
}
- val |= PPSCMDx(index, 0x2);
val |= TRGTMODSELx(index, 0x2);
val |= PPSEN0;
+ writel(val, ioaddr + MAC_PPS_CONTROL);
writel(cfg->start.tv_sec, ioaddr + MAC_PPSx_TARGET_TIME_SEC(index));
@@ -554,6 +568,7 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index,
writel(period - 1, ioaddr + MAC_PPSx_WIDTH(index));
/* Finally, activate it */
+ val |= PPSCMDx(index, 0x2);
writel(val, ioaddr + MAC_PPS_CONTROL);
return 0;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index f453b0d09366..35c8dd92d369 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -551,16 +551,16 @@ static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data)
p = (char *)priv + offsetof(struct stmmac_priv,
xstats.txq_stats[q].tx_pkt_n);
for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) {
- *data++ = (*(u64 *)p);
- p += sizeof(u64 *);
+ *data++ = (*(unsigned long *)p);
+ p += sizeof(unsigned long);
}
}
for (q = 0; q < rx_cnt; q++) {
p = (char *)priv + offsetof(struct stmmac_priv,
xstats.rxq_stats[q].rx_pkt_n);
for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) {
- *data++ = (*(u64 *)p);
- p += sizeof(u64 *);
+ *data++ = (*(unsigned long *)p);
+ p += sizeof(unsigned long);
}
}
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index c6951c976f5d..1a5b8dab5e9b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1080,7 +1080,8 @@ static void stmmac_mac_link_up(struct phylink_config *config,
stmmac_mac_set(priv, priv->ioaddr, true);
if (phy && priv->dma_cap.eee) {
- priv->eee_active = phy_init_eee(phy, 1) >= 0;
+ priv->eee_active =
+ phy_init_eee(phy, !priv->plat->rx_clk_runs_in_lpi) >= 0;
priv->eee_enabled = stmmac_eee_init(priv);
priv->tx_lpi_enabled = priv->eee_enabled;
stmmac_set_eee_pls(priv, priv->hw, true);
@@ -1150,6 +1151,11 @@ static int stmmac_init_phy(struct net_device *dev)
int addr = priv->plat->phy_addr;
struct phy_device *phydev;
+ if (addr < 0) {
+ netdev_err(priv->dev, "no phy found\n");
+ return -ENODEV;
+ }
+
phydev = mdiobus_get_phy(priv->mii, addr);
if (!phydev) {
netdev_err(priv->dev, "no phy at addr %d\n", addr);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index eb6d9cd8e93f..0046a4ee6e64 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -559,7 +559,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
dma_cfg->mixed_burst = of_property_read_bool(np, "snps,mixed-burst");
plat->force_thresh_dma_mode = of_property_read_bool(np, "snps,force_thresh_dma_mode");
- if (plat->force_thresh_dma_mode) {
+ if (plat->force_thresh_dma_mode && plat->force_sf_dma_mode) {
plat->force_sf_dma_mode = 0;
dev_warn(&pdev->dev,
"force_sf_dma_mode is ignored if force_thresh_dma_mode is set.\n");
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index fc06ddeac0d5..b4388ca8d211 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -210,7 +210,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
}
writel(acr_value, ptpaddr + PTP_ACR);
mutex_unlock(&priv->aux_ts_lock);
- ret = 0;
+ /* wait for auxts fifo clear to finish */
+ ret = readl_poll_timeout(ptpaddr + PTP_ACR, acr_value,
+ !(acr_value & PTP_ACR_ATSFC),
+ 10, 10000);
break;
default:
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index ecbde83b5243..6cda4b7c10cb 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -501,7 +501,15 @@ static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common)
k3_udma_glue_disable_tx_chn(common->tx_chns[i].tx_chn);
}
+ reinit_completion(&common->tdown_complete);
k3_udma_glue_tdown_rx_chn(common->rx_chns.rx_chn, true);
+
+ if (common->pdata.quirks & AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ) {
+ i = wait_for_completion_timeout(&common->tdown_complete, msecs_to_jiffies(1000));
+ if (!i)
+ dev_err(common->dev, "rx teardown timeout\n");
+ }
+
napi_disable(&common->napi_rx);
for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++)
@@ -721,6 +729,8 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common,
if (cppi5_desc_is_tdcm(desc_dma)) {
dev_dbg(dev, "%s RX tdown flow: %u\n", __func__, flow_idx);
+ if (common->pdata.quirks & AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ)
+ complete(&common->tdown_complete);
return 0;
}
@@ -2672,7 +2682,7 @@ static const struct am65_cpsw_pdata j721e_pdata = {
};
static const struct am65_cpsw_pdata am64x_cpswxg_pdata = {
- .quirks = 0,
+ .quirks = AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ,
.ale_dev_id = "am64-cpswxg",
.fdqring_mode = K3_RINGACC_RING_MODE_RING,
};
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
index 4b75620f8d28..e5f1c44788c1 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
@@ -90,6 +90,7 @@ struct am65_cpsw_rx_chn {
};
#define AM65_CPSW_QUIRK_I2027_NO_TX_CSUM BIT(0)
+#define AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ BIT(1)
struct am65_cpsw_pdata {
u32 quirks;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 9352dad58996..79f4e13620a4 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -987,9 +987,6 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
void netvsc_dma_unmap(struct hv_device *hv_dev,
struct hv_netvsc_packet *packet)
{
- u32 page_count = packet->cp_partial ?
- packet->page_buf_cnt - packet->rmsg_pgcnt :
- packet->page_buf_cnt;
int i;
if (!hv_is_isolation_supported())
@@ -998,7 +995,7 @@ void netvsc_dma_unmap(struct hv_device *hv_dev,
if (!packet->dma_range)
return;
- for (i = 0; i < page_count; i++)
+ for (i = 0; i < packet->page_buf_cnt; i++)
dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma,
packet->dma_range[i].mapping_size,
DMA_TO_DEVICE);
@@ -1028,9 +1025,7 @@ static int netvsc_dma_map(struct hv_device *hv_dev,
struct hv_netvsc_packet *packet,
struct hv_page_buffer *pb)
{
- u32 page_count = packet->cp_partial ?
- packet->page_buf_cnt - packet->rmsg_pgcnt :
- packet->page_buf_cnt;
+ u32 page_count = packet->page_buf_cnt;
dma_addr_t dma;
int i;
@@ -1039,7 +1034,7 @@ static int netvsc_dma_map(struct hv_device *hv_dev,
packet->dma_range = kcalloc(page_count,
sizeof(*packet->dma_range),
- GFP_KERNEL);
+ GFP_ATOMIC);
if (!packet->dma_range)
return -ENOMEM;
diff --git a/drivers/net/ipa/data/ipa_data-v4.7.c b/drivers/net/ipa/data/ipa_data-v4.7.c
index 7552c400961e..b83390c48615 100644
--- a/drivers/net/ipa/data/ipa_data-v4.7.c
+++ b/drivers/net/ipa/data/ipa_data-v4.7.c
@@ -357,7 +357,7 @@ static const struct ipa_mem ipa_mem_local_data[] = {
static const struct ipa_mem_data ipa_mem_data = {
.local_count = ARRAY_SIZE(ipa_mem_local_data),
.local = ipa_mem_local_data,
- .imem_addr = 0x146a9000,
+ .imem_addr = 0x146a8000,
.imem_size = 0x00002000,
.smem_id = 497,
.smem_size = 0x00009000,
diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c
index a49f66efacb8..c1b3977e1ae4 100644
--- a/drivers/net/ipa/ipa_interrupt.c
+++ b/drivers/net/ipa/ipa_interrupt.c
@@ -127,15 +127,25 @@ out_power_put:
return IRQ_HANDLED;
}
+void ipa_interrupt_irq_disable(struct ipa *ipa)
+{
+ disable_irq(ipa->interrupt->irq);
+}
+
+void ipa_interrupt_irq_enable(struct ipa *ipa)
+{
+ enable_irq(ipa->interrupt->irq);
+}
+
/* Common function used to enable/disable TX_SUSPEND for an endpoint */
static void ipa_interrupt_suspend_control(struct ipa_interrupt *interrupt,
u32 endpoint_id, bool enable)
{
struct ipa *ipa = interrupt->ipa;
+ u32 mask = BIT(endpoint_id % 32);
u32 unit = endpoint_id / 32;
const struct ipa_reg *reg;
u32 offset;
- u32 mask;
u32 val;
WARN_ON(!test_bit(endpoint_id, ipa->available));
@@ -148,7 +158,6 @@ static void ipa_interrupt_suspend_control(struct ipa_interrupt *interrupt,
offset = ipa_reg_n_offset(reg, unit);
val = ioread32(ipa->reg_virt + offset);
- mask = BIT(endpoint_id);
if (enable)
val |= mask;
else
diff --git a/drivers/net/ipa/ipa_interrupt.h b/drivers/net/ipa/ipa_interrupt.h
index f31fd9965fdc..8a1bd5b89393 100644
--- a/drivers/net/ipa/ipa_interrupt.h
+++ b/drivers/net/ipa/ipa_interrupt.h
@@ -86,6 +86,22 @@ void ipa_interrupt_suspend_clear_all(struct ipa_interrupt *interrupt);
void ipa_interrupt_simulate_suspend(struct ipa_interrupt *interrupt);
/**
+ * ipa_interrupt_irq_enable() - Enable IPA interrupts
+ * @ipa: IPA pointer
+ *
+ * This enables the IPA interrupt line
+ */
+void ipa_interrupt_irq_enable(struct ipa *ipa);
+
+/**
+ * ipa_interrupt_irq_disable() - Disable IPA interrupts
+ * @ipa: IPA pointer
+ *
+ * This disables the IPA interrupt line
+ */
+void ipa_interrupt_irq_disable(struct ipa *ipa);
+
+/**
* ipa_interrupt_config() - Configure the IPA interrupt framework
* @ipa: IPA pointer
*
diff --git a/drivers/net/ipa/ipa_power.c b/drivers/net/ipa/ipa_power.c
index 8420f93128a2..8057be8cda80 100644
--- a/drivers/net/ipa/ipa_power.c
+++ b/drivers/net/ipa/ipa_power.c
@@ -181,6 +181,17 @@ static int ipa_suspend(struct device *dev)
__set_bit(IPA_POWER_FLAG_SYSTEM, ipa->power->flags);
+ /* Increment the disable depth to ensure that the IRQ won't
+ * be re-enabled until the matching _enable call in
+ * ipa_resume(). We do this to ensure that the interrupt
+ * handler won't run whilst PM runtime is disabled.
+ *
+ * Note that disabling the IRQ is NOT the same as disabling
+ * irq wake. If wakeup is enabled for the IPA then the IRQ
+ * will still cause the system to wake up, see irq_set_irq_wake().
+ */
+ ipa_interrupt_irq_disable(ipa);
+
return pm_runtime_force_suspend(dev);
}
@@ -193,6 +204,12 @@ static int ipa_resume(struct device *dev)
__clear_bit(IPA_POWER_FLAG_SYSTEM, ipa->power->flags);
+ /* Now that PM runtime is enabled again it's safe
+ * to turn the IRQ back on and process any data
+ * that was received during suspend.
+ */
+ ipa_interrupt_irq_enable(ipa);
+
return ret;
}
diff --git a/drivers/net/mdio/mdio-mux-meson-g12a.c b/drivers/net/mdio/mdio-mux-meson-g12a.c
index 4a2e94faf57e..c4542ecf5623 100644
--- a/drivers/net/mdio/mdio-mux-meson-g12a.c
+++ b/drivers/net/mdio/mdio-mux-meson-g12a.c
@@ -4,6 +4,7 @@
*/
#include <linux/bitfield.h>
+#include <linux/delay.h>
#include <linux/clk.h>
#include <linux/clk-provider.h>
#include <linux/device.h>
@@ -150,6 +151,7 @@ static const struct clk_ops g12a_ephy_pll_ops = {
static int g12a_enable_internal_mdio(struct g12a_mdio_mux *priv)
{
+ u32 value;
int ret;
/* Enable the phy clock */
@@ -163,18 +165,25 @@ static int g12a_enable_internal_mdio(struct g12a_mdio_mux *priv)
/* Initialize ephy control */
writel(EPHY_G12A_ID, priv->regs + ETH_PHY_CNTL0);
- writel(FIELD_PREP(PHY_CNTL1_ST_MODE, 3) |
- FIELD_PREP(PHY_CNTL1_ST_PHYADD, EPHY_DFLT_ADD) |
- FIELD_PREP(PHY_CNTL1_MII_MODE, EPHY_MODE_RMII) |
- PHY_CNTL1_CLK_EN |
- PHY_CNTL1_CLKFREQ |
- PHY_CNTL1_PHY_ENB,
- priv->regs + ETH_PHY_CNTL1);
+
+ /* Make sure we get a 0 -> 1 transition on the enable bit */
+ value = FIELD_PREP(PHY_CNTL1_ST_MODE, 3) |
+ FIELD_PREP(PHY_CNTL1_ST_PHYADD, EPHY_DFLT_ADD) |
+ FIELD_PREP(PHY_CNTL1_MII_MODE, EPHY_MODE_RMII) |
+ PHY_CNTL1_CLK_EN |
+ PHY_CNTL1_CLKFREQ;
+ writel(value, priv->regs + ETH_PHY_CNTL1);
writel(PHY_CNTL2_USE_INTERNAL |
PHY_CNTL2_SMI_SRC_MAC |
PHY_CNTL2_RX_CLK_EPHY,
priv->regs + ETH_PHY_CNTL2);
+ value |= PHY_CNTL1_PHY_ENB;
+ writel(value, priv->regs + ETH_PHY_CNTL1);
+
+ /* The phy needs a bit of time to power up */
+ mdelay(10);
+
return 0;
}
diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c
index a6f05e35d91f..b7cb71817780 100644
--- a/drivers/net/phy/dp83822.c
+++ b/drivers/net/phy/dp83822.c
@@ -233,7 +233,8 @@ static int dp83822_config_intr(struct phy_device *phydev)
DP83822_ENERGY_DET_INT_EN |
DP83822_LINK_QUAL_INT_EN);
- if (!dp83822->fx_enabled)
+ /* Private data pointer is NULL on DP83825/26 */
+ if (!dp83822 || !dp83822->fx_enabled)
misr_status |= DP83822_ANEG_COMPLETE_INT_EN |
DP83822_DUP_MODE_CHANGE_INT_EN |
DP83822_SPEED_CHANGED_INT_EN;
@@ -253,7 +254,8 @@ static int dp83822_config_intr(struct phy_device *phydev)
DP83822_PAGE_RX_INT_EN |
DP83822_EEE_ERROR_CHANGE_INT_EN);
- if (!dp83822->fx_enabled)
+ /* Private data pointer is NULL on DP83825/26 */
+ if (!dp83822 || !dp83822->fx_enabled)
misr_status |= DP83822_ANEG_ERR_INT_EN |
DP83822_WOL_PKT_INT_EN;
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 1cd604cd1fa1..16e021b477f0 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -108,7 +108,12 @@ EXPORT_SYMBOL(mdiobus_unregister_device);
struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr)
{
- struct mdio_device *mdiodev = bus->mdio_map[addr];
+ struct mdio_device *mdiodev;
+
+ if (addr < 0 || addr >= ARRAY_SIZE(bus->mdio_map))
+ return NULL;
+
+ mdiodev = bus->mdio_map[addr];
if (!mdiodev)
return NULL;
diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c
index c49062ad72c6..a6015cd03bff 100644
--- a/drivers/net/phy/meson-gxl.c
+++ b/drivers/net/phy/meson-gxl.c
@@ -261,6 +261,8 @@ static struct phy_driver meson_gxl_phy[] = {
.handle_interrupt = meson_gxl_handle_interrupt,
.suspend = genphy_suspend,
.resume = genphy_resume,
+ .read_mmd = genphy_read_mmd_unsupported,
+ .write_mmd = genphy_write_mmd_unsupported,
}, {
PHY_ID_MATCH_EXACT(0x01803301),
.name = "Meson G12A Internal PHY",
@@ -271,6 +273,8 @@ static struct phy_driver meson_gxl_phy[] = {
.handle_interrupt = meson_gxl_handle_interrupt,
.suspend = genphy_suspend,
.resume = genphy_resume,
+ .read_mmd = genphy_read_mmd_unsupported,
+ .write_mmd = genphy_write_mmd_unsupported,
},
};
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 716870a4499c..607aa786c8cb 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1517,7 +1517,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
* another mac interface, so we should create a device link between
* phy dev and mac dev.
*/
- if (phydev->mdio.bus->parent && dev->dev.parent != phydev->mdio.bus->parent)
+ if (dev && phydev->mdio.bus->parent && dev->dev.parent != phydev->mdio.bus->parent)
phydev->devlink = device_link_add(dev->dev.parent, &phydev->mdio.dev,
DL_FLAG_PM_RUNTIME | DL_FLAG_STATELESS);
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 09cc65c0da93..4d2519cdb801 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -1812,10 +1812,9 @@ int phylink_fwnode_phy_connect(struct phylink *pl,
ret = phy_attach_direct(pl->netdev, phy_dev, flags,
pl->link_interface);
- if (ret) {
- phy_device_free(phy_dev);
+ phy_device_free(phy_dev);
+ if (ret)
return ret;
- }
ret = phylink_bringup_phy(pl, phy_dev, pl->link_config.interface);
if (ret)
diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c
index 8dcb49ed1f3d..7fd9fe6a602b 100644
--- a/drivers/net/phy/xilinx_gmii2rgmii.c
+++ b/drivers/net/phy/xilinx_gmii2rgmii.c
@@ -105,6 +105,7 @@ static int xgmiitorgmii_probe(struct mdio_device *mdiodev)
if (!priv->phy_dev->drv) {
dev_info(dev, "Attached phy not ready\n");
+ put_device(&priv->phy_dev->mdio.dev);
return -EPROBE_DEFER;
}
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index fcd43d62d86b..d10606f257c4 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1044,7 +1044,6 @@ static int team_port_enter(struct team *team, struct team_port *port)
goto err_port_enter;
}
}
- port->dev->priv_flags |= IFF_NO_ADDRCONF;
return 0;
@@ -1058,7 +1057,6 @@ static void team_port_leave(struct team *team, struct team_port *port)
{
if (team->ops.port_leave)
team->ops.port_leave(team, port);
- port->dev->priv_flags &= ~IFF_NO_ADDRCONF;
dev_put(team->dev);
}
diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index 8911cd2ed534..c140edb4b648 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -1008,6 +1008,12 @@ static const struct usb_device_id products[] = {
USB_CDC_PROTO_NONE),
.driver_info = (unsigned long)&wwan_info,
}, {
+ /* Cinterion PLS62-W modem by GEMALTO/THALES */
+ USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x005b, USB_CLASS_COMM,
+ USB_CDC_SUBCLASS_ETHERNET,
+ USB_CDC_PROTO_NONE),
+ .driver_info = (unsigned long)&wwan_info,
+}, {
/* Cinterion PLS83/PLS63 modem by GEMALTO/THALES */
USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0069, USB_CLASS_COMM,
USB_CDC_SUBCLASS_ETHERNET,
diff --git a/drivers/net/usb/kalmia.c b/drivers/net/usb/kalmia.c
index 9f2b70ef39aa..613fc6910f14 100644
--- a/drivers/net/usb/kalmia.c
+++ b/drivers/net/usb/kalmia.c
@@ -65,8 +65,8 @@ kalmia_send_init_packet(struct usbnet *dev, u8 *init_msg, u8 init_msg_len,
init_msg, init_msg_len, &act_len, KALMIA_USB_TIMEOUT);
if (status != 0) {
netdev_err(dev->net,
- "Error sending init packet. Status %i, length %i\n",
- status, act_len);
+ "Error sending init packet. Status %i\n",
+ status);
return status;
}
else if (act_len != init_msg_len) {
@@ -83,8 +83,8 @@ kalmia_send_init_packet(struct usbnet *dev, u8 *init_msg, u8 init_msg_len,
if (status != 0)
netdev_err(dev->net,
- "Error receiving init result. Status %i, length %i\n",
- status, act_len);
+ "Error receiving init result. Status %i\n",
+ status);
else if (act_len != expected_len)
netdev_err(dev->net, "Unexpected init result length: %i\n",
act_len);
diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c
index 2c82fbcaab22..7a2b0094de51 100644
--- a/drivers/net/usb/plusb.c
+++ b/drivers/net/usb/plusb.c
@@ -57,9 +57,7 @@
static inline int
pl_vendor_req(struct usbnet *dev, u8 req, u8 val, u8 index)
{
- return usbnet_read_cmd(dev, req,
- USB_DIR_IN | USB_TYPE_VENDOR |
- USB_RECIP_DEVICE,
+ return usbnet_write_cmd(dev, req, USB_TYPE_VENDOR | USB_RECIP_DEVICE,
val, index, NULL, 0);
}
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index a481a1d831e2..23da1d9dafd1 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -9836,6 +9836,7 @@ static const struct usb_device_id rtl8152_table[] = {
REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07ab),
REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07c6),
REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0927),
+ REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0c5e),
REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101),
REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x304f),
REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3054),
diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c
index f79333fe1783..7b3739b29c8f 100644
--- a/drivers/net/usb/rndis_host.c
+++ b/drivers/net/usb/rndis_host.c
@@ -255,7 +255,8 @@ static int rndis_query(struct usbnet *dev, struct usb_interface *intf,
off = le32_to_cpu(u.get_c->offset);
len = le32_to_cpu(u.get_c->len);
- if (unlikely((8 + off + len) > CONTROL_BUFFER_SIZE))
+ if (unlikely((off > CONTROL_BUFFER_SIZE - 8) ||
+ (len > CONTROL_BUFFER_SIZE - 8 - off)))
goto response_error;
if (*reply_len != -1 && len != *reply_len)
diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c
index 5a53e63d33a6..3164451e1010 100644
--- a/drivers/net/usb/sr9700.c
+++ b/drivers/net/usb/sr9700.c
@@ -413,7 +413,7 @@ static int sr9700_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
/* ignore the CRC length */
len = (skb->data[1] | (skb->data[2] << 8)) - 4;
- if (len > ETH_FRAME_LEN || len > skb->len)
+ if (len > ETH_FRAME_LEN || len > skb->len || len < 0)
return 0;
/* the last packet of current skb */
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index ac7c0653695f..dfc7d87fad59 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -974,6 +974,9 @@ static int veth_poll(struct napi_struct *napi, int budget)
xdp_set_return_frame_no_direct();
done = veth_xdp_rcv(rq, budget, &bq, &stats);
+ if (stats.xdp_redirect > 0)
+ xdp_do_flush();
+
if (done < budget && napi_complete_done(napi, done)) {
/* Write rx_notify_masked before reading ptr_ring */
smp_store_mb(rq->rx_notify_masked, false);
@@ -987,8 +990,6 @@ static int veth_poll(struct napi_struct *napi, int budget)
if (stats.xdp_tx > 0)
veth_xdp_flush(rq, &bq);
- if (stats.xdp_redirect > 0)
- xdp_do_flush();
xdp_clear_return_frame_no_direct();
return done;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 7723b2a49d8e..61e33e4dd0cd 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1677,13 +1677,13 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
received = virtnet_receive(rq, budget, &xdp_xmit);
+ if (xdp_xmit & VIRTIO_XDP_REDIR)
+ xdp_do_flush();
+
/* Out of packets? */
if (received < budget)
virtqueue_napi_complete(napi, rq->vq, received);
- if (xdp_xmit & VIRTIO_XDP_REDIR)
- xdp_do_flush();
-
if (xdp_xmit & VIRTIO_XDP_TX) {
sq = virtnet_xdp_get_sq(vi);
if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
@@ -1877,8 +1877,10 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
*/
if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
netif_stop_subqueue(dev, qnum);
- if (!use_napi &&
- unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
+ if (use_napi) {
+ if (unlikely(!virtqueue_enable_cb_delayed(sq->vq)))
+ virtqueue_napi_schedule(&sq->napi, sq->vq);
+ } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
/* More just got used, free them then recheck. */
free_old_xmit_skbs(sq, false);
if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
@@ -2156,8 +2158,8 @@ static int virtnet_close(struct net_device *dev)
cancel_delayed_work_sync(&vi->refill);
for (i = 0; i < vi->max_queue_pairs; i++) {
- xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
napi_disable(&vi->rq[i].napi);
+ xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
virtnet_napi_tx_disable(&vi->sq[i].napi);
}
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 6f1e560fb15c..682987040ea8 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1288,6 +1288,10 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
(le32_to_cpu(gdesc->dword[3]) &
VMXNET3_RCD_CSUM_OK) == VMXNET3_RCD_CSUM_OK) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
+ if ((le32_to_cpu(gdesc->dword[0]) &
+ (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))) {
+ skb->csum_level = 1;
+ }
WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) &&
!(le32_to_cpu(gdesc->dword[0]) &
(1UL << VMXNET3_RCD_HDR_INNER_SHIFT)));
@@ -1297,6 +1301,10 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
} else if (gdesc->rcd.v6 && (le32_to_cpu(gdesc->dword[3]) &
(1 << VMXNET3_RCD_TUC_SHIFT))) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
+ if ((le32_to_cpu(gdesc->dword[0]) &
+ (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))) {
+ skb->csum_level = 1;
+ }
WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) &&
!(le32_to_cpu(gdesc->dword[0]) &
(1UL << VMXNET3_RCD_HDR_INNER_SHIFT)));
@@ -1538,31 +1546,6 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
rxd->len = rbi->len;
}
-#ifdef VMXNET3_RSS
- if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE &&
- (adapter->netdev->features & NETIF_F_RXHASH)) {
- enum pkt_hash_types hash_type;
-
- switch (rcd->rssType) {
- case VMXNET3_RCD_RSS_TYPE_IPV4:
- case VMXNET3_RCD_RSS_TYPE_IPV6:
- hash_type = PKT_HASH_TYPE_L3;
- break;
- case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
- case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
- case VMXNET3_RCD_RSS_TYPE_UDPIPV4:
- case VMXNET3_RCD_RSS_TYPE_UDPIPV6:
- hash_type = PKT_HASH_TYPE_L4;
- break;
- default:
- hash_type = PKT_HASH_TYPE_L3;
- break;
- }
- skb_set_hash(ctx->skb,
- le32_to_cpu(rcd->rssHash),
- hash_type);
- }
-#endif
skb_record_rx_queue(ctx->skb, rq->qid);
skb_put(ctx->skb, rcd->len);
@@ -1645,6 +1628,31 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
u32 mtu = adapter->netdev->mtu;
skb->len += skb->data_len;
+#ifdef VMXNET3_RSS
+ if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE &&
+ (adapter->netdev->features & NETIF_F_RXHASH)) {
+ enum pkt_hash_types hash_type;
+
+ switch (rcd->rssType) {
+ case VMXNET3_RCD_RSS_TYPE_IPV4:
+ case VMXNET3_RCD_RSS_TYPE_IPV6:
+ hash_type = PKT_HASH_TYPE_L3;
+ break;
+ case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
+ case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
+ case VMXNET3_RCD_RSS_TYPE_UDPIPV4:
+ case VMXNET3_RCD_RSS_TYPE_UDPIPV6:
+ hash_type = PKT_HASH_TYPE_L4;
+ break;
+ default:
+ hash_type = PKT_HASH_TYPE_L3;
+ break;
+ }
+ skb_set_hash(skb,
+ le32_to_cpu(rcd->rssHash),
+ hash_type);
+ }
+#endif
vmxnet3_rx_csum(adapter, skb,
(union Vmxnet3_GenericDesc *)rcd);
skb->protocol = eth_type_trans(skb, adapter->netdev);
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 6b5a4d036d15..bdb3a76a352e 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1385,8 +1385,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
/* loopback, multicast & non-ND link-local traffic; do not push through
* packet taps again. Reset pkt_type for upper layers to process skb.
- * For strict packets with a source LLA, determine the dst using the
- * original ifindex.
+ * For non-loopback strict packets, determine the dst using the original
+ * ifindex.
*/
if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) {
skb->dev = vrf_dev;
@@ -1395,7 +1395,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
if (skb->pkt_type == PACKET_LOOPBACK)
skb->pkt_type = PACKET_HOST;
- else if (ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)
+ else
vrf_ip6_input_dst(skb, vrf_dev, orig_iif);
goto out;
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 92224b36787a..b1b179effe2a 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -2917,16 +2917,23 @@ static int vxlan_init(struct net_device *dev)
vxlan_vnigroup_init(vxlan);
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
+ if (!dev->tstats) {
+ err = -ENOMEM;
+ goto err_vnigroup_uninit;
+ }
err = gro_cells_init(&vxlan->gro_cells, dev);
- if (err) {
- free_percpu(dev->tstats);
- return err;
- }
+ if (err)
+ goto err_free_percpu;
return 0;
+
+err_free_percpu:
+ free_percpu(dev->tstats);
+err_vnigroup_uninit:
+ if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
+ vxlan_vnigroup_uninit(vxlan);
+ return err;
}
static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni)
diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
index 22edea6ca4b8..1c53b5546927 100644
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -1243,9 +1243,11 @@ static int ucc_hdlc_probe(struct platform_device *pdev)
free_dev:
free_netdev(dev);
undo_uhdlc_init:
- iounmap(utdm->siram);
+ if (utdm)
+ iounmap(utdm->siram);
unmap_si_regs:
- iounmap(utdm->si_regs);
+ if (utdm)
+ iounmap(utdm->si_regs);
free_utdm:
if (uhdlc_priv->tsa)
kfree(utdm);
diff --git a/drivers/net/wireless/ath/ath9k/htc.h b/drivers/net/wireless/ath/ath9k/htc.h
index 30f0765fb9fd..237f4ec2cffd 100644
--- a/drivers/net/wireless/ath/ath9k/htc.h
+++ b/drivers/net/wireless/ath/ath9k/htc.h
@@ -327,9 +327,9 @@ static inline struct ath9k_htc_tx_ctl *HTC_SKB_CB(struct sk_buff *skb)
}
#ifdef CONFIG_ATH9K_HTC_DEBUGFS
-#define __STAT_SAFE(hif_dev, expr) ((hif_dev)->htc_handle->drv_priv ? (expr) : 0)
-#define CAB_STAT_INC(priv) ((priv)->debug.tx_stats.cab_queued++)
-#define TX_QSTAT_INC(priv, q) ((priv)->debug.tx_stats.queue_stats[q]++)
+#define __STAT_SAFE(hif_dev, expr) do { ((hif_dev)->htc_handle->drv_priv ? (expr) : 0); } while (0)
+#define CAB_STAT_INC(priv) do { ((priv)->debug.tx_stats.cab_queued++); } while (0)
+#define TX_QSTAT_INC(priv, q) do { ((priv)->debug.tx_stats.queue_stats[q]++); } while (0)
#define TX_STAT_INC(hif_dev, c) \
__STAT_SAFE((hif_dev), (hif_dev)->htc_handle->drv_priv->debug.tx_stats.c++)
@@ -378,10 +378,10 @@ void ath9k_htc_get_et_stats(struct ieee80211_hw *hw,
struct ethtool_stats *stats, u64 *data);
#else
-#define TX_STAT_INC(hif_dev, c)
-#define TX_STAT_ADD(hif_dev, c, a)
-#define RX_STAT_INC(hif_dev, c)
-#define RX_STAT_ADD(hif_dev, c, a)
+#define TX_STAT_INC(hif_dev, c) do { } while (0)
+#define TX_STAT_ADD(hif_dev, c, a) do { } while (0)
+#define RX_STAT_INC(hif_dev, c) do { } while (0)
+#define RX_STAT_ADD(hif_dev, c, a) do { } while (0)
#define CAB_STAT_INC(priv)
#define TX_QSTAT_INC(priv, c)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index bff3128c2f26..b115902eb475 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -7937,6 +7937,9 @@ cfg80211_set_channel(struct wiphy *wiphy, struct net_device *dev,
struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy);
struct brcmf_if *ifp = netdev_priv(cfg_to_ndev(cfg));
+ if (chan->flags & IEEE80211_CHAN_DISABLED)
+ return -EINVAL;
+
/* set_channel */
chspec = channel_to_chanspec(&cfg->d11inf, chan);
if (chspec != INVCHANSPEC) {
@@ -7961,7 +7964,7 @@ brcmf_cfg80211_dump_survey(struct wiphy *wiphy, struct net_device *ndev,
struct brcmf_if *ifp = netdev_priv(cfg_to_ndev(cfg));
struct brcmf_dump_survey survey = {};
struct ieee80211_supported_band *band;
- struct ieee80211_channel *chan;
+ enum nl80211_band band_id;
struct cca_msrmnt_query req;
u32 noise;
int err;
@@ -7974,26 +7977,25 @@ brcmf_cfg80211_dump_survey(struct wiphy *wiphy, struct net_device *ndev,
return -EBUSY;
}
- band = wiphy->bands[NL80211_BAND_2GHZ];
- if (band && idx >= band->n_channels) {
- idx -= band->n_channels;
- band = NULL;
- }
+ for (band_id = 0; band_id < NUM_NL80211_BANDS; band_id++) {
+ band = wiphy->bands[band_id];
+ if (!band)
+ continue;
+ if (idx >= band->n_channels) {
+ idx -= band->n_channels;
+ continue;
+ }
- if (!band || idx >= band->n_channels) {
- band = wiphy->bands[NL80211_BAND_5GHZ];
- if (idx >= band->n_channels)
- return -ENOENT;
+ info->channel = &band->channels[idx];
+ break;
}
+ if (band_id == NUM_NL80211_BANDS)
+ return -ENOENT;
/* Setting current channel to the requested channel */
- chan = &band->channels[idx];
- err = cfg80211_set_channel(wiphy, ndev, chan, NL80211_CHAN_HT20);
- if (err) {
- info->channel = chan;
- info->filled = 0;
+ info->filled = 0;
+ if (cfg80211_set_channel(wiphy, ndev, info->channel, NL80211_CHAN_HT20))
return 0;
- }
/* Disable mpc */
brcmf_set_mpc(ifp, 0);
@@ -8028,7 +8030,6 @@ brcmf_cfg80211_dump_survey(struct wiphy *wiphy, struct net_device *ndev,
if (err)
goto exit;
- info->channel = chan;
info->noise = noise;
info->time = ACS_MSRMNT_DELAY;
info->time_busy = ACS_MSRMNT_DELAY - survey.idle;
@@ -8040,7 +8041,7 @@ brcmf_cfg80211_dump_survey(struct wiphy *wiphy, struct net_device *ndev,
SURVEY_INFO_TIME_TX;
brcmf_dbg(INFO, "OBSS dump: channel %d: survey duration %d\n",
- ieee80211_frequency_to_channel(chan->center_freq),
+ ieee80211_frequency_to_channel(info->channel->center_freq),
ACS_MSRMNT_DELAY);
brcmf_dbg(INFO, "noise(%d) busy(%llu) rx(%llu) tx(%llu)\n",
info->noise, info->time_busy, info->time_rx, info->time_tx);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c
index a83699de01ec..fdd0c9abc1a1 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c
@@ -79,7 +79,8 @@ void brcmf_of_probe(struct device *dev, enum brcmf_bus_type bus_type,
/* Apple ARM64 platforms have their own idea of board type, passed in
* via the device tree. They also have an antenna SKU parameter
*/
- if (!of_property_read_string(np, "brcm,board-type", &prop))
+ err = of_property_read_string(np, "brcm,board-type", &prop);
+ if (!err)
settings->board_type = prop;
if (!of_property_read_string(np, "apple,antenna-sku", &prop))
@@ -87,7 +88,7 @@ void brcmf_of_probe(struct device *dev, enum brcmf_bus_type bus_type,
/* Set board-type to the first string of the machine compatible prop */
root = of_find_node_by_path("/");
- if (root && !settings->board_type) {
+ if (root && err) {
char *board_type;
const char *tmp;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
index ae57a9a3ab05..b67f6d0810b6 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
@@ -1228,7 +1228,7 @@ static int brcmf_pcie_init_ringbuffers(struct brcmf_pciedev_info *devinfo)
BRCMF_NROF_H2D_COMMON_MSGRINGS;
max_completionrings = BRCMF_NROF_D2H_COMMON_MSGRINGS;
}
- if (max_flowrings > 256) {
+ if (max_flowrings > 512) {
brcmf_err(bus, "invalid max_flowrings(%d)\n", max_flowrings);
return -EIO;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
index e6d64152c81a..a02e5a67b706 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
@@ -1106,6 +1106,11 @@ int iwl_read_ppag_table(struct iwl_fw_runtime *fwrt, union iwl_ppag_table_cmd *c
int i, j, num_sub_bands;
s8 *gain;
+ /* many firmware images for JF lie about this */
+ if (CSR_HW_RFID_TYPE(fwrt->trans->hw_rf_id) ==
+ CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_JF))
+ return -EOPNOTSUPP;
+
if (!fw_has_capa(&fwrt->fw->ucode_capa, IWL_UCODE_TLV_CAPA_SET_PPAG)) {
IWL_DEBUG_RADIO(fwrt,
"PPAG capability not supported by FW, command not sent.\n");
diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index f795548562f5..06161815c180 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -206,71 +206,103 @@ mt76_dma_queue_reset(struct mt76_dev *dev, struct mt76_queue *q)
}
static int
+mt76_dma_add_rx_buf(struct mt76_dev *dev, struct mt76_queue *q,
+ struct mt76_queue_buf *buf, void *data)
+{
+ struct mt76_desc *desc = &q->desc[q->head];
+ struct mt76_queue_entry *entry = &q->entry[q->head];
+ struct mt76_txwi_cache *txwi = NULL;
+ u32 buf1 = 0, ctrl;
+ int idx = q->head;
+ int rx_token;
+
+ ctrl = FIELD_PREP(MT_DMA_CTL_SD_LEN0, buf[0].len);
+
+ if ((q->flags & MT_QFLAG_WED) &&
+ FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) == MT76_WED_Q_RX) {
+ txwi = mt76_get_rxwi(dev);
+ if (!txwi)
+ return -ENOMEM;
+
+ rx_token = mt76_rx_token_consume(dev, data, txwi, buf->addr);
+ if (rx_token < 0) {
+ mt76_put_rxwi(dev, txwi);
+ return -ENOMEM;
+ }
+
+ buf1 |= FIELD_PREP(MT_DMA_CTL_TOKEN, rx_token);
+ ctrl |= MT_DMA_CTL_TO_HOST;
+ }
+
+ WRITE_ONCE(desc->buf0, cpu_to_le32(buf->addr));
+ WRITE_ONCE(desc->buf1, cpu_to_le32(buf1));
+ WRITE_ONCE(desc->ctrl, cpu_to_le32(ctrl));
+ WRITE_ONCE(desc->info, 0);
+
+ entry->dma_addr[0] = buf->addr;
+ entry->dma_len[0] = buf->len;
+ entry->txwi = txwi;
+ entry->buf = data;
+ entry->wcid = 0xffff;
+ entry->skip_buf1 = true;
+ q->head = (q->head + 1) % q->ndesc;
+ q->queued++;
+
+ return idx;
+}
+
+static int
mt76_dma_add_buf(struct mt76_dev *dev, struct mt76_queue *q,
struct mt76_queue_buf *buf, int nbufs, u32 info,
struct sk_buff *skb, void *txwi)
{
struct mt76_queue_entry *entry;
struct mt76_desc *desc;
- u32 ctrl;
int i, idx = -1;
+ u32 ctrl, next;
+
+ if (txwi) {
+ q->entry[q->head].txwi = DMA_DUMMY_DATA;
+ q->entry[q->head].skip_buf0 = true;
+ }
for (i = 0; i < nbufs; i += 2, buf += 2) {
u32 buf0 = buf[0].addr, buf1 = 0;
idx = q->head;
- q->head = (q->head + 1) % q->ndesc;
+ next = (q->head + 1) % q->ndesc;
desc = &q->desc[idx];
entry = &q->entry[idx];
- if ((q->flags & MT_QFLAG_WED) &&
- FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) == MT76_WED_Q_RX) {
- struct mt76_txwi_cache *t = txwi;
- int rx_token;
-
- if (!t)
- return -ENOMEM;
-
- rx_token = mt76_rx_token_consume(dev, (void *)skb, t,
- buf[0].addr);
- buf1 |= FIELD_PREP(MT_DMA_CTL_TOKEN, rx_token);
- ctrl = FIELD_PREP(MT_DMA_CTL_SD_LEN0, buf[0].len) |
- MT_DMA_CTL_TO_HOST;
- } else {
- if (txwi) {
- q->entry[q->head].txwi = DMA_DUMMY_DATA;
- q->entry[q->head].skip_buf0 = true;
- }
-
- if (buf[0].skip_unmap)
- entry->skip_buf0 = true;
- entry->skip_buf1 = i == nbufs - 1;
-
- entry->dma_addr[0] = buf[0].addr;
- entry->dma_len[0] = buf[0].len;
-
- ctrl = FIELD_PREP(MT_DMA_CTL_SD_LEN0, buf[0].len);
- if (i < nbufs - 1) {
- entry->dma_addr[1] = buf[1].addr;
- entry->dma_len[1] = buf[1].len;
- buf1 = buf[1].addr;
- ctrl |= FIELD_PREP(MT_DMA_CTL_SD_LEN1, buf[1].len);
- if (buf[1].skip_unmap)
- entry->skip_buf1 = true;
- }
-
- if (i == nbufs - 1)
- ctrl |= MT_DMA_CTL_LAST_SEC0;
- else if (i == nbufs - 2)
- ctrl |= MT_DMA_CTL_LAST_SEC1;
+ if (buf[0].skip_unmap)
+ entry->skip_buf0 = true;
+ entry->skip_buf1 = i == nbufs - 1;
+
+ entry->dma_addr[0] = buf[0].addr;
+ entry->dma_len[0] = buf[0].len;
+
+ ctrl = FIELD_PREP(MT_DMA_CTL_SD_LEN0, buf[0].len);
+ if (i < nbufs - 1) {
+ entry->dma_addr[1] = buf[1].addr;
+ entry->dma_len[1] = buf[1].len;
+ buf1 = buf[1].addr;
+ ctrl |= FIELD_PREP(MT_DMA_CTL_SD_LEN1, buf[1].len);
+ if (buf[1].skip_unmap)
+ entry->skip_buf1 = true;
}
+ if (i == nbufs - 1)
+ ctrl |= MT_DMA_CTL_LAST_SEC0;
+ else if (i == nbufs - 2)
+ ctrl |= MT_DMA_CTL_LAST_SEC1;
+
WRITE_ONCE(desc->buf0, cpu_to_le32(buf0));
WRITE_ONCE(desc->buf1, cpu_to_le32(buf1));
WRITE_ONCE(desc->info, cpu_to_le32(info));
WRITE_ONCE(desc->ctrl, cpu_to_le32(ctrl));
+ q->head = next;
q->queued++;
}
@@ -577,17 +609,9 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
spin_lock_bh(&q->lock);
while (q->queued < q->ndesc - 1) {
- struct mt76_txwi_cache *t = NULL;
struct mt76_queue_buf qbuf;
void *buf = NULL;
- if ((q->flags & MT_QFLAG_WED) &&
- FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) == MT76_WED_Q_RX) {
- t = mt76_get_rxwi(dev);
- if (!t)
- break;
- }
-
buf = page_frag_alloc(rx_page, q->buf_size, GFP_ATOMIC);
if (!buf)
break;
@@ -601,7 +625,12 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
qbuf.addr = addr + offset;
qbuf.len = len - offset;
qbuf.skip_unmap = false;
- mt76_dma_add_buf(dev, q, &qbuf, 1, 0, buf, t);
+ if (mt76_dma_add_rx_buf(dev, q, &qbuf, buf) < 0) {
+ dma_unmap_single(dev->dma_dev, addr, len,
+ DMA_FROM_DEVICE);
+ skb_free_frag(buf);
+ break;
+ }
frames++;
}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
index 0a95c3da241b..8388e2a65853 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
@@ -653,6 +653,13 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct mtk_wed_device *wed, int size)
desc->buf0 = cpu_to_le32(phy_addr);
token = mt76_rx_token_consume(&dev->mt76, ptr, t, phy_addr);
+ if (token < 0) {
+ dma_unmap_single(dev->mt76.dma_dev, phy_addr,
+ wed->wlan.rx_size, DMA_TO_DEVICE);
+ skb_free_frag(ptr);
+ goto unmap;
+ }
+
desc->token |= cpu_to_le32(FIELD_PREP(MT_DMA_CTL_TOKEN,
token));
desc++;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/Kconfig b/drivers/net/wireless/mediatek/mt76/mt7996/Kconfig
index 5c5fc569e6d5..79fb47a73c91 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/Kconfig
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/Kconfig
@@ -2,6 +2,7 @@
config MT7996E
tristate "MediaTek MT7996 (PCIe) support"
select MT76_CONNAC_LIB
+ select RELAY
depends on MAC80211
depends on PCI
help
diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
index 24568b98ed9d..1f309d05380a 100644
--- a/drivers/net/wireless/mediatek/mt76/tx.c
+++ b/drivers/net/wireless/mediatek/mt76/tx.c
@@ -764,11 +764,12 @@ int mt76_rx_token_consume(struct mt76_dev *dev, void *ptr,
spin_lock_bh(&dev->rx_token_lock);
token = idr_alloc(&dev->rx_token, t, 0, dev->rx_token_size,
GFP_ATOMIC);
+ if (token >= 0) {
+ t->ptr = ptr;
+ t->dma_addr = phys;
+ }
spin_unlock_bh(&dev->rx_token_lock);
- t->ptr = ptr;
- t->dma_addr = phys;
-
return token;
}
EXPORT_SYMBOL_GPL(mt76_rx_token_consume);
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 82a7458e01ae..bf72e5fd39cf 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -696,8 +696,8 @@ static int rndis_query_oid(struct usbnet *dev, u32 oid, void *data, int *len)
struct rndis_query *get;
struct rndis_query_c *get_c;
} u;
- int ret, buflen;
- int resplen, respoffs, copylen;
+ int ret;
+ size_t buflen, resplen, respoffs, copylen;
buflen = *len + sizeof(*u.get);
if (buflen < CONTROL_BUFFER_SIZE)
@@ -732,22 +732,15 @@ static int rndis_query_oid(struct usbnet *dev, u32 oid, void *data, int *len)
if (respoffs > buflen) {
/* Device returned data offset outside buffer, error. */
- netdev_dbg(dev->net, "%s(%s): received invalid "
- "data offset: %d > %d\n", __func__,
- oid_to_string(oid), respoffs, buflen);
+ netdev_dbg(dev->net,
+ "%s(%s): received invalid data offset: %zu > %zu\n",
+ __func__, oid_to_string(oid), respoffs, buflen);
ret = -EINVAL;
goto exit_unlock;
}
- if ((resplen + respoffs) > buflen) {
- /* Device would have returned more data if buffer would
- * have been big enough. Copy just the bits that we got.
- */
- copylen = buflen - respoffs;
- } else {
- copylen = resplen;
- }
+ copylen = min(resplen, buflen - respoffs);
if (copylen > *len)
copylen = *len;
diff --git a/drivers/net/wireless/ti/Makefile b/drivers/net/wireless/ti/Makefile
index 0530dd744275..05ee016594f8 100644
--- a/drivers/net/wireless/ti/Makefile
+++ b/drivers/net/wireless/ti/Makefile
@@ -3,6 +3,3 @@ obj-$(CONFIG_WLCORE) += wlcore/
obj-$(CONFIG_WL12XX) += wl12xx/
obj-$(CONFIG_WL1251) += wl1251/
obj-$(CONFIG_WL18XX) += wl18xx/
-
-# small builtin driver bit
-obj-$(CONFIG_WILINK_PLATFORM_DATA) += wilink_platform_data.o
diff --git a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif.c b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif.c
index 7eff3531b9a5..7ff33c1d6ac7 100644
--- a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif.c
+++ b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif.c
@@ -152,6 +152,15 @@ static irqreturn_t t7xx_dpmaif_isr_handler(int irq, void *data)
}
t7xx_pcie_mac_clear_int(dpmaif_ctrl->t7xx_dev, isr_para->pcie_int);
+
+ return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t t7xx_dpmaif_isr_thread(int irq, void *data)
+{
+ struct dpmaif_isr_para *isr_para = data;
+ struct dpmaif_ctrl *dpmaif_ctrl = isr_para->dpmaif_ctrl;
+
t7xx_dpmaif_irq_cb(isr_para);
t7xx_pcie_mac_set_int(dpmaif_ctrl->t7xx_dev, isr_para->pcie_int);
return IRQ_HANDLED;
@@ -188,7 +197,7 @@ static void t7xx_dpmaif_register_pcie_irq(struct dpmaif_ctrl *dpmaif_ctrl)
t7xx_pcie_mac_clear_int(t7xx_dev, int_type);
t7xx_dev->intr_handler[int_type] = t7xx_dpmaif_isr_handler;
- t7xx_dev->intr_thread[int_type] = NULL;
+ t7xx_dev->intr_thread[int_type] = t7xx_dpmaif_isr_thread;
t7xx_dev->callback_param[int_type] = isr_para;
t7xx_pcie_mac_clear_int_status(t7xx_dev, int_type);
diff --git a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c
index aa2174a10437..f4ff2198b5ef 100644
--- a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c
+++ b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c
@@ -840,14 +840,13 @@ int t7xx_dpmaif_napi_rx_poll(struct napi_struct *napi, const int budget)
if (!rxq->que_started) {
atomic_set(&rxq->rx_processing, 0);
+ pm_runtime_put_autosuspend(rxq->dpmaif_ctrl->dev);
dev_err(rxq->dpmaif_ctrl->dev, "Work RXQ: %d has not been started\n", rxq->index);
return work_done;
}
- if (!rxq->sleep_lock_pending) {
- pm_runtime_get_noresume(rxq->dpmaif_ctrl->dev);
+ if (!rxq->sleep_lock_pending)
t7xx_pci_disable_sleep(t7xx_dev);
- }
ret = try_wait_for_completion(&t7xx_dev->sleep_lock_acquire);
if (!ret) {
@@ -876,22 +875,22 @@ int t7xx_dpmaif_napi_rx_poll(struct napi_struct *napi, const int budget)
napi_complete_done(napi, work_done);
t7xx_dpmaif_clr_ip_busy_sts(&rxq->dpmaif_ctrl->hw_info);
t7xx_dpmaif_dlq_unmask_rx_done(&rxq->dpmaif_ctrl->hw_info, rxq->index);
+ t7xx_pci_enable_sleep(rxq->dpmaif_ctrl->t7xx_dev);
+ pm_runtime_mark_last_busy(rxq->dpmaif_ctrl->dev);
+ pm_runtime_put_autosuspend(rxq->dpmaif_ctrl->dev);
+ atomic_set(&rxq->rx_processing, 0);
} else {
t7xx_dpmaif_clr_ip_busy_sts(&rxq->dpmaif_ctrl->hw_info);
}
- t7xx_pci_enable_sleep(rxq->dpmaif_ctrl->t7xx_dev);
- pm_runtime_mark_last_busy(rxq->dpmaif_ctrl->dev);
- pm_runtime_put_noidle(rxq->dpmaif_ctrl->dev);
- atomic_set(&rxq->rx_processing, 0);
-
return work_done;
}
void t7xx_dpmaif_irq_rx_done(struct dpmaif_ctrl *dpmaif_ctrl, const unsigned int que_mask)
{
struct dpmaif_rx_queue *rxq;
- int qno;
+ struct dpmaif_ctrl *ctrl;
+ int qno, ret;
qno = ffs(que_mask) - 1;
if (qno < 0 || qno > DPMAIF_RXQ_NUM - 1) {
@@ -900,6 +899,18 @@ void t7xx_dpmaif_irq_rx_done(struct dpmaif_ctrl *dpmaif_ctrl, const unsigned int
}
rxq = &dpmaif_ctrl->rxq[qno];
+ ctrl = rxq->dpmaif_ctrl;
+ /* We need to make sure that the modem has been resumed before
+ * calling napi. This can't be done inside the polling function
+ * as we could be blocked waiting for device to be resumed,
+ * which can't be done from softirq context the poll function
+ * is running in.
+ */
+ ret = pm_runtime_resume_and_get(ctrl->dev);
+ if (ret < 0 && ret != -EACCES) {
+ dev_err(ctrl->dev, "Failed to resume device: %d\n", ret);
+ return;
+ }
napi_schedule(&rxq->napi);
}
diff --git a/drivers/net/wwan/t7xx/t7xx_netdev.c b/drivers/net/wwan/t7xx/t7xx_netdev.c
index 494a28e386a3..3ef4a8a4f8fd 100644
--- a/drivers/net/wwan/t7xx/t7xx_netdev.c
+++ b/drivers/net/wwan/t7xx/t7xx_netdev.c
@@ -27,6 +27,7 @@
#include <linux/list.h>
#include <linux/netdev_features.h>
#include <linux/netdevice.h>
+#include <linux/pm_runtime.h>
#include <linux/skbuff.h>
#include <linux/types.h>
#include <linux/wwan.h>
@@ -45,12 +46,25 @@
static void t7xx_ccmni_enable_napi(struct t7xx_ccmni_ctrl *ctlb)
{
- int i;
+ struct dpmaif_ctrl *ctrl;
+ int i, ret;
+
+ ctrl = ctlb->hif_ctrl;
if (ctlb->is_napi_en)
return;
for (i = 0; i < RXQ_NUM; i++) {
+ /* The usage count has to be bumped every time before calling
+ * napi_schedule. It will be decresed in the poll routine,
+ * right after napi_complete_done is called.
+ */
+ ret = pm_runtime_resume_and_get(ctrl->dev);
+ if (ret < 0) {
+ dev_err(ctrl->dev, "Failed to resume device: %d\n",
+ ret);
+ return;
+ }
napi_enable(ctlb->napi[i]);
napi_schedule(ctlb->napi[i]);
}
diff --git a/drivers/net/wwan/t7xx/t7xx_pci.c b/drivers/net/wwan/t7xx/t7xx_pci.c
index 871f2a27a398..226fc1703e90 100644
--- a/drivers/net/wwan/t7xx/t7xx_pci.c
+++ b/drivers/net/wwan/t7xx/t7xx_pci.c
@@ -121,6 +121,8 @@ void t7xx_pci_pm_init_late(struct t7xx_pci_dev *t7xx_dev)
iowrite32(T7XX_L1_BIT(0), IREG_BASE(t7xx_dev) + ENABLE_ASPM_LOWPWR);
atomic_set(&t7xx_dev->md_pm_state, MTK_PM_RESUMED);
+ pm_runtime_mark_last_busy(&t7xx_dev->pdev->dev);
+ pm_runtime_allow(&t7xx_dev->pdev->dev);
pm_runtime_put_noidle(&t7xx_dev->pdev->dev);
}
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index c1ba4294f364..001636901dda 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -977,7 +977,7 @@ static int read_xenbus_vif_flags(struct backend_info *be)
return 0;
}
-static int netback_remove(struct xenbus_device *dev)
+static void netback_remove(struct xenbus_device *dev)
{
struct backend_info *be = dev_get_drvdata(&dev->dev);
@@ -992,7 +992,6 @@ static int netback_remove(struct xenbus_device *dev)
kfree(be->hotplug_script);
kfree(be);
dev_set_drvdata(&dev->dev, NULL);
- return 0;
}
/*
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 14aec417fa06..12b074286df9 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -2646,7 +2646,7 @@ static void xennet_bus_close(struct xenbus_device *dev)
} while (!ret);
}
-static int xennet_remove(struct xenbus_device *dev)
+static void xennet_remove(struct xenbus_device *dev)
{
struct netfront_info *info = dev_get_drvdata(&dev->dev);
@@ -2662,8 +2662,6 @@ static int xennet_remove(struct xenbus_device *dev)
rtnl_unlock();
}
xennet_free_netdev(info->netdev);
-
- return 0;
}
static const struct xenbus_device_id netfront_ids[] = {
diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c
index 6f71ac72012e..ed9c5e2cf3ad 100644
--- a/drivers/nfc/pn533/usb.c
+++ b/drivers/nfc/pn533/usb.c
@@ -153,10 +153,17 @@ static int pn533_usb_send_ack(struct pn533 *dev, gfp_t flags)
return usb_submit_urb(phy->ack_urb, flags);
}
+struct pn533_out_arg {
+ struct pn533_usb_phy *phy;
+ struct completion done;
+};
+
static int pn533_usb_send_frame(struct pn533 *dev,
struct sk_buff *out)
{
struct pn533_usb_phy *phy = dev->phy;
+ struct pn533_out_arg arg;
+ void *cntx;
int rc;
if (phy->priv == NULL)
@@ -168,10 +175,17 @@ static int pn533_usb_send_frame(struct pn533 *dev,
print_hex_dump_debug("PN533 TX: ", DUMP_PREFIX_NONE, 16, 1,
out->data, out->len, false);
+ init_completion(&arg.done);
+ cntx = phy->out_urb->context;
+ phy->out_urb->context = &arg;
+
rc = usb_submit_urb(phy->out_urb, GFP_KERNEL);
if (rc)
return rc;
+ wait_for_completion(&arg.done);
+ phy->out_urb->context = cntx;
+
if (dev->protocol_type == PN533_PROTO_REQ_RESP) {
/* request for response for sent packet directly */
rc = pn533_submit_urb_for_response(phy, GFP_KERNEL);
@@ -408,7 +422,31 @@ static int pn533_acr122_poweron_rdr(struct pn533_usb_phy *phy)
return arg.rc;
}
-static void pn533_send_complete(struct urb *urb)
+static void pn533_out_complete(struct urb *urb)
+{
+ struct pn533_out_arg *arg = urb->context;
+ struct pn533_usb_phy *phy = arg->phy;
+
+ switch (urb->status) {
+ case 0:
+ break; /* success */
+ case -ECONNRESET:
+ case -ENOENT:
+ dev_dbg(&phy->udev->dev,
+ "The urb has been stopped (status %d)\n",
+ urb->status);
+ break;
+ case -ESHUTDOWN:
+ default:
+ nfc_err(&phy->udev->dev,
+ "Urb failure (status %d)\n",
+ urb->status);
+ }
+
+ complete(&arg->done);
+}
+
+static void pn533_ack_complete(struct urb *urb)
{
struct pn533_usb_phy *phy = urb->context;
@@ -496,10 +534,10 @@ static int pn533_usb_probe(struct usb_interface *interface,
usb_fill_bulk_urb(phy->out_urb, phy->udev,
usb_sndbulkpipe(phy->udev, out_endpoint),
- NULL, 0, pn533_send_complete, phy);
+ NULL, 0, pn533_out_complete, phy);
usb_fill_bulk_urb(phy->ack_urb, phy->udev,
usb_sndbulkpipe(phy->udev, out_endpoint),
- NULL, 0, pn533_send_complete, phy);
+ NULL, 0, pn533_ack_complete, phy);
switch (id->driver_info) {
case PN533_DEVICE_STD:
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 79d93126453d..77b06d54cc62 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -102,6 +102,25 @@ config NVDIMM_KEYS
depends on ENCRYPTED_KEYS
depends on (LIBNVDIMM=ENCRYPTED_KEYS) || LIBNVDIMM=m
+config NVDIMM_KMSAN
+ bool
+ depends on KMSAN
+ help
+ KMSAN, and other memory debug facilities, increase the size of
+ 'struct page' to contain extra metadata. This collides with
+ the NVDIMM capability to store a potentially
+ larger-than-"System RAM" size 'struct page' array in a
+ reservation of persistent memory rather than limited /
+ precious DRAM. However, that reservation needs to persist for
+ the life of the given NVDIMM namespace. If you are using KMSAN
+ to debug an issue unrelated to NVDIMMs or DAX then say N to this
+ option. Otherwise, say Y but understand that any namespaces
+ (with the page array stored pmem) created with this build of
+ the kernel will permanently reserve and strand excess
+ capacity compared to the CONFIG_KMSAN=n case.
+
+ Select N if unsure.
+
config NVDIMM_TEST_BUILD
tristate "Build the unit test core"
depends on m
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 85ca5b4da3cf..ec5219680092 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -652,7 +652,7 @@ void devm_namespace_disable(struct device *dev,
struct nd_namespace_common *ndns);
#if IS_ENABLED(CONFIG_ND_CLAIM)
/* max struct page size independent of kernel config */
-#define MAX_STRUCT_PAGE_SIZE 128
+#define MAX_STRUCT_PAGE_SIZE 64
int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap);
#else
static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 61af072ac98f..af7d9301520c 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -13,6 +13,8 @@
#include "pfn.h"
#include "nd.h"
+static const bool page_struct_override = IS_ENABLED(CONFIG_NVDIMM_KMSAN);
+
static void nd_pfn_release(struct device *dev)
{
struct nd_region *nd_region = to_nd_region(dev->parent);
@@ -758,12 +760,6 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
return -ENXIO;
}
- /*
- * Note, we use 64 here for the standard size of struct page,
- * debugging options may cause it to be larger in which case the
- * implementation will limit the pfns advertised through
- * ->direct_access() to those that are included in the memmap.
- */
start = nsio->res.start;
size = resource_size(&nsio->res);
npfns = PHYS_PFN(size - SZ_8K);
@@ -782,20 +778,33 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
}
end_trunc = start + size - ALIGN_DOWN(start + size, align);
if (nd_pfn->mode == PFN_MODE_PMEM) {
+ unsigned long page_map_size = MAX_STRUCT_PAGE_SIZE * npfns;
+
/*
* The altmap should be padded out to the block size used
* when populating the vmemmap. This *should* be equal to
* PMD_SIZE for most architectures.
*
- * Also make sure size of struct page is less than 128. We
- * want to make sure we use large enough size here so that
- * we don't have a dynamic reserve space depending on
- * struct page size. But we also want to make sure we notice
- * when we end up adding new elements to struct page.
+ * Also make sure size of struct page is less than
+ * MAX_STRUCT_PAGE_SIZE. The goal here is compatibility in the
+ * face of production kernel configurations that reduce the
+ * 'struct page' size below MAX_STRUCT_PAGE_SIZE. For debug
+ * kernel configurations that increase the 'struct page' size
+ * above MAX_STRUCT_PAGE_SIZE, the page_struct_override allows
+ * for continuing with the capacity that will be wasted when
+ * reverting to a production kernel configuration. Otherwise,
+ * those configurations are blocked by default.
*/
- BUILD_BUG_ON(sizeof(struct page) > MAX_STRUCT_PAGE_SIZE);
- offset = ALIGN(start + SZ_8K + MAX_STRUCT_PAGE_SIZE * npfns, align)
- - start;
+ if (sizeof(struct page) > MAX_STRUCT_PAGE_SIZE) {
+ if (page_struct_override)
+ page_map_size = sizeof(struct page) * npfns;
+ else {
+ dev_err(&nd_pfn->dev,
+ "Memory debug options prevent using pmem for the page map\n");
+ return -EINVAL;
+ }
+ }
+ offset = ALIGN(start + SZ_8K + page_map_size, align) - start;
} else if (nd_pfn->mode == PFN_MODE_RAM)
offset = ALIGN(start + SZ_8K, align) - start;
else
@@ -818,7 +827,10 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
pfn_sb->version_minor = cpu_to_le16(4);
pfn_sb->end_trunc = cpu_to_le32(end_trunc);
pfn_sb->align = cpu_to_le32(nd_pfn->align);
- pfn_sb->page_struct_size = cpu_to_le16(MAX_STRUCT_PAGE_SIZE);
+ if (sizeof(struct page) > MAX_STRUCT_PAGE_SIZE && page_struct_override)
+ pfn_sb->page_struct_size = cpu_to_le16(sizeof(struct page));
+ else
+ pfn_sb->page_struct_size = cpu_to_le16(MAX_STRUCT_PAGE_SIZE);
pfn_sb->page_size = cpu_to_le32(PAGE_SIZE);
checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
pfn_sb->checksum = cpu_to_le64(checksum);
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index e36aeb50b4ed..b317ce6c4ec3 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -829,7 +829,23 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown)
apple_nvme_remove_cq(anv);
}
- nvme_disable_ctrl(&anv->ctrl, shutdown);
+ /*
+ * Always disable the NVMe controller after shutdown.
+ * We need to do this to bring it back up later anyway, and we
+ * can't do it while the firmware is not running (e.g. in the
+ * resume reset path before RTKit is initialized), so for Apple
+ * controllers it makes sense to unconditionally do it here.
+ * Additionally, this sequence of events is reliable, while
+ * others (like disabling after bringing back the firmware on
+ * resume) seem to run into trouble under some circumstances.
+ *
+ * Both U-Boot and m1n1 also use this convention (i.e. an ANS
+ * NVMe controller is handed off with firmware shut down, in an
+ * NVMe disabled state, after a clean shutdown).
+ */
+ if (shutdown)
+ nvme_disable_ctrl(&anv->ctrl, shutdown);
+ nvme_disable_ctrl(&anv->ctrl, false);
}
WRITE_ONCE(anv->ioq.enabled, false);
@@ -985,11 +1001,11 @@ static void apple_nvme_reset_work(struct work_struct *work)
goto out;
}
- if (anv->ctrl.ctrl_config & NVME_CC_ENABLE)
- apple_nvme_disable(anv, false);
-
/* RTKit must be shut down cleanly for the (soft)-reset to work */
if (apple_rtkit_is_running(anv->rtk)) {
+ /* reset the controller if it is enabled */
+ if (anv->ctrl.ctrl_config & NVME_CC_ENABLE)
+ apple_nvme_disable(anv, false);
dev_dbg(anv->dev, "Trying to shut down RTKit before reset.");
ret = apple_rtkit_shutdown(anv->rtk);
if (ret)
@@ -1493,7 +1509,7 @@ static int apple_nvme_probe(struct platform_device *pdev)
}
ret = nvme_init_ctrl(&anv->ctrl, anv->dev, &nvme_ctrl_ops,
- NVME_QUIRK_SKIP_CID_GEN);
+ NVME_QUIRK_SKIP_CID_GEN | NVME_QUIRK_IDENTIFY_CNS);
if (ret) {
dev_err_probe(dev, ret, "Failed to initialize nvme_ctrl");
goto put_dev;
diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
index 4424f53a8a0a..bdb97496ba2d 100644
--- a/drivers/nvme/host/auth.c
+++ b/drivers/nvme/host/auth.c
@@ -45,6 +45,8 @@ struct nvme_dhchap_queue_context {
int sess_key_len;
};
+static struct workqueue_struct *nvme_auth_wq;
+
#define nvme_auth_flags_from_qid(qid) \
(qid == 0) ? 0 : BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED
#define nvme_auth_queue_from_qid(ctrl, qid) \
@@ -866,7 +868,7 @@ int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid)
chap = &ctrl->dhchap_ctxs[qid];
cancel_work_sync(&chap->auth_work);
- queue_work(nvme_wq, &chap->auth_work);
+ queue_work(nvme_auth_wq, &chap->auth_work);
return 0;
}
EXPORT_SYMBOL_GPL(nvme_auth_negotiate);
@@ -1008,10 +1010,15 @@ EXPORT_SYMBOL_GPL(nvme_auth_free);
int __init nvme_init_auth(void)
{
+ nvme_auth_wq = alloc_workqueue("nvme-auth-wq",
+ WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+ if (!nvme_auth_wq)
+ return -ENOMEM;
+
nvme_chap_buf_cache = kmem_cache_create("nvme-chap-buf-cache",
CHAP_BUF_SIZE, 0, SLAB_HWCACHE_ALIGN, NULL);
if (!nvme_chap_buf_cache)
- return -ENOMEM;
+ goto err_destroy_workqueue;
nvme_chap_buf_pool = mempool_create(16, mempool_alloc_slab,
mempool_free_slab, nvme_chap_buf_cache);
@@ -1021,6 +1028,8 @@ int __init nvme_init_auth(void)
return 0;
err_destroy_chap_buf_cache:
kmem_cache_destroy(nvme_chap_buf_cache);
+err_destroy_workqueue:
+ destroy_workqueue(nvme_auth_wq);
return -ENOMEM;
}
@@ -1028,4 +1037,5 @@ void __exit nvme_exit_auth(void)
{
mempool_destroy(nvme_chap_buf_pool);
kmem_cache_destroy(nvme_chap_buf_cache);
+ destroy_workqueue(nvme_auth_wq);
}
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 7be562a4e1aa..8b6421141162 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1093,7 +1093,7 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
if (ns) {
if (ns->head->effects)
effects = le32_to_cpu(ns->head->effects->iocs[opcode]);
- if (ns->head->ids.csi == NVME_CAP_CSS_NVM)
+ if (ns->head->ids.csi == NVME_CSI_NVM)
effects |= nvme_known_nvm_effects(opcode);
if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))
dev_warn_once(ctrl->device,
@@ -4921,7 +4921,9 @@ out_cleanup_admin_q:
blk_mq_destroy_queue(ctrl->admin_q);
blk_put_queue(ctrl->admin_q);
out_free_tagset:
- blk_mq_free_tag_set(ctrl->admin_tagset);
+ blk_mq_free_tag_set(set);
+ ctrl->admin_q = NULL;
+ ctrl->fabrics_q = NULL;
return ret;
}
EXPORT_SYMBOL_GPL(nvme_alloc_admin_tag_set);
@@ -4983,6 +4985,7 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
out_free_tag_set:
blk_mq_free_tag_set(set);
+ ctrl->connect_q = NULL;
return ret;
}
EXPORT_SYMBOL_GPL(nvme_alloc_io_tag_set);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 4564f16a0b20..456ee42a6133 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3521,13 +3521,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
nvme_fc_init_queue(ctrl, 0);
- ret = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set,
- &nvme_fc_admin_mq_ops,
- struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
- ctrl->lport->ops->fcprqst_priv_sz));
- if (ret)
- goto out_free_queues;
-
/*
* Would have been nice to init io queues tag set as well.
* However, we require interaction from the controller
@@ -3537,10 +3530,17 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0);
if (ret)
- goto out_cleanup_tagset;
+ goto out_free_queues;
/* at this point, teardown path changes to ref counting on nvme ctrl */
+ ret = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set,
+ &nvme_fc_admin_mq_ops,
+ struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
+ ctrl->lport->ops->fcprqst_priv_sz));
+ if (ret)
+ goto fail_ctrl;
+
spin_lock_irqsave(&rport->lock, flags);
list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list);
spin_unlock_irqrestore(&rport->lock, flags);
@@ -3592,8 +3592,6 @@ fail_ctrl:
return ERR_PTR(-EIO);
-out_cleanup_tagset:
- nvme_remove_admin_tag_set(&ctrl->ctrl);
out_free_queues:
kfree(ctrl->queues);
out_free_ida:
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index a8639919237e..06f52db34be9 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -8,8 +8,13 @@
#include <linux/io_uring.h>
#include "nvme.h"
+enum {
+ NVME_IOCTL_VEC = (1 << 0),
+ NVME_IOCTL_PARTITION = (1 << 1),
+};
+
static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
- fmode_t mode)
+ unsigned int flags, fmode_t mode)
{
u32 effects;
@@ -17,6 +22,13 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
return true;
/*
+ * Do not allow unprivileged passthrough on partitions, as that allows an
+ * escape from the containment of the partition.
+ */
+ if (flags & NVME_IOCTL_PARTITION)
+ return false;
+
+ /*
* Do not allow unprivileged processes to send vendor specific or fabrics
* commands as we can't be sure about their effects.
*/
@@ -150,7 +162,7 @@ static struct request *nvme_alloc_user_request(struct request_queue *q,
static int nvme_map_user_request(struct request *req, u64 ubuffer,
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
u32 meta_seed, void **metap, struct io_uring_cmd *ioucmd,
- bool vec)
+ unsigned int flags)
{
struct request_queue *q = req->q;
struct nvme_ns *ns = q->queuedata;
@@ -163,7 +175,7 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
struct iov_iter iter;
/* fixedbufs is only for non-vectored io */
- if (WARN_ON_ONCE(vec))
+ if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC))
return -EINVAL;
ret = io_uring_cmd_import_fixed(ubuffer, bufflen,
rq_data_dir(req), &iter, ioucmd);
@@ -172,8 +184,8 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL);
} else {
ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer),
- bufflen, GFP_KERNEL, vec, 0, 0,
- rq_data_dir(req));
+ bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0,
+ 0, rq_data_dir(req));
}
if (ret)
@@ -203,9 +215,9 @@ out:
}
static int nvme_submit_user_cmd(struct request_queue *q,
- struct nvme_command *cmd, u64 ubuffer,
- unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
- u32 meta_seed, u64 *result, unsigned timeout, bool vec)
+ struct nvme_command *cmd, u64 ubuffer, unsigned bufflen,
+ void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
+ u64 *result, unsigned timeout, unsigned int flags)
{
struct nvme_ctrl *ctrl;
struct request *req;
@@ -221,7 +233,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
req->timeout = timeout;
if (ubuffer && bufflen) {
ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer,
- meta_len, meta_seed, &meta, NULL, vec);
+ meta_len, meta_seed, &meta, NULL, flags);
if (ret)
return ret;
}
@@ -304,10 +316,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
c.rw.apptag = cpu_to_le16(io.apptag);
c.rw.appmask = cpu_to_le16(io.appmask);
- return nvme_submit_user_cmd(ns->queue, &c,
- io.addr, length,
- metadata, meta_len, lower_32_bits(io.slba), NULL, 0,
- false);
+ return nvme_submit_user_cmd(ns->queue, &c, io.addr, length, metadata,
+ meta_len, lower_32_bits(io.slba), NULL, 0, 0);
}
static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
@@ -325,7 +335,8 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
}
static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
- struct nvme_passthru_cmd __user *ucmd, fmode_t mode)
+ struct nvme_passthru_cmd __user *ucmd, unsigned int flags,
+ fmode_t mode)
{
struct nvme_passthru_cmd cmd;
struct nvme_command c;
@@ -353,16 +364,15 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.cdw14 = cpu_to_le32(cmd.cdw14);
c.common.cdw15 = cpu_to_le32(cmd.cdw15);
- if (!nvme_cmd_allowed(ns, &c, mode))
+ if (!nvme_cmd_allowed(ns, &c, 0, mode))
return -EACCES;
if (cmd.timeout_ms)
timeout = msecs_to_jiffies(cmd.timeout_ms);
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
- cmd.addr, cmd.data_len,
- nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
- 0, &result, timeout, false);
+ cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata),
+ cmd.metadata_len, 0, &result, timeout, 0);
if (status >= 0) {
if (put_user(result, &ucmd->result))
@@ -373,8 +383,8 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
}
static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
- struct nvme_passthru_cmd64 __user *ucmd, bool vec,
- fmode_t mode)
+ struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags,
+ fmode_t mode)
{
struct nvme_passthru_cmd64 cmd;
struct nvme_command c;
@@ -401,16 +411,15 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.cdw14 = cpu_to_le32(cmd.cdw14);
c.common.cdw15 = cpu_to_le32(cmd.cdw15);
- if (!nvme_cmd_allowed(ns, &c, mode))
+ if (!nvme_cmd_allowed(ns, &c, flags, mode))
return -EACCES;
if (cmd.timeout_ms)
timeout = msecs_to_jiffies(cmd.timeout_ms);
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
- cmd.addr, cmd.data_len,
- nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
- 0, &cmd.result, timeout, vec);
+ cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata),
+ cmd.metadata_len, 0, &cmd.result, timeout, flags);
if (status >= 0) {
if (put_user(cmd.result, &ucmd->result))
@@ -571,7 +580,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14));
c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15));
- if (!nvme_cmd_allowed(ns, &c, ioucmd->file->f_mode))
+ if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode))
return -EACCES;
d.metadata = READ_ONCE(cmd->metadata);
@@ -641,9 +650,9 @@ static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd,
{
switch (cmd) {
case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_cmd(ctrl, NULL, argp, mode);
+ return nvme_user_cmd(ctrl, NULL, argp, 0, mode);
case NVME_IOCTL_ADMIN64_CMD:
- return nvme_user_cmd64(ctrl, NULL, argp, false, mode);
+ return nvme_user_cmd64(ctrl, NULL, argp, 0, mode);
default:
return sed_ioctl(ctrl->opal_dev, cmd, argp);
}
@@ -668,14 +677,14 @@ struct nvme_user_io32 {
#endif /* COMPAT_FOR_U64_ALIGNMENT */
static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
- void __user *argp, fmode_t mode)
+ void __user *argp, unsigned int flags, fmode_t mode)
{
switch (cmd) {
case NVME_IOCTL_ID:
force_successful_syscall_return();
return ns->head->ns_id;
case NVME_IOCTL_IO_CMD:
- return nvme_user_cmd(ns->ctrl, ns, argp, mode);
+ return nvme_user_cmd(ns->ctrl, ns, argp, flags, mode);
/*
* struct nvme_user_io can have different padding on some 32-bit ABIs.
* Just accept the compat version as all fields that are used are the
@@ -686,37 +695,40 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
#endif
case NVME_IOCTL_SUBMIT_IO:
return nvme_submit_io(ns, argp);
- case NVME_IOCTL_IO64_CMD:
- return nvme_user_cmd64(ns->ctrl, ns, argp, false, mode);
case NVME_IOCTL_IO64_CMD_VEC:
- return nvme_user_cmd64(ns->ctrl, ns, argp, true, mode);
+ flags |= NVME_IOCTL_VEC;
+ fallthrough;
+ case NVME_IOCTL_IO64_CMD:
+ return nvme_user_cmd64(ns->ctrl, ns, argp, flags, mode);
default:
return -ENOTTY;
}
}
-static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg,
- fmode_t mode)
-{
- if (is_ctrl_ioctl(cmd))
- return nvme_ctrl_ioctl(ns->ctrl, cmd, arg, mode);
- return nvme_ns_ioctl(ns, cmd, arg, mode);
-}
-
int nvme_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
struct nvme_ns *ns = bdev->bd_disk->private_data;
+ void __user *argp = (void __user *)arg;
+ unsigned int flags = 0;
- return __nvme_ioctl(ns, cmd, (void __user *)arg, mode);
+ if (bdev_is_partition(bdev))
+ flags |= NVME_IOCTL_PARTITION;
+
+ if (is_ctrl_ioctl(cmd))
+ return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, mode);
+ return nvme_ns_ioctl(ns, cmd, argp, flags, mode);
}
long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct nvme_ns *ns =
container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev);
+ void __user *argp = (void __user *)arg;
- return __nvme_ioctl(ns, cmd, (void __user *)arg, file->f_mode);
+ if (is_ctrl_ioctl(cmd))
+ return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, file->f_mode);
+ return nvme_ns_ioctl(ns, cmd, argp, 0, file->f_mode);
}
static int nvme_uring_cmd_checks(unsigned int issue_flags)
@@ -806,6 +818,10 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
void __user *argp = (void __user *)arg;
struct nvme_ns *ns;
int srcu_idx, ret = -EWOULDBLOCK;
+ unsigned int flags = 0;
+
+ if (bdev_is_partition(bdev))
+ flags |= NVME_IOCTL_PARTITION;
srcu_idx = srcu_read_lock(&head->srcu);
ns = nvme_find_path(head);
@@ -821,7 +837,7 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx,
mode);
- ret = nvme_ns_ioctl(ns, cmd, argp, mode);
+ ret = nvme_ns_ioctl(ns, cmd, argp, flags, mode);
out_unlock:
srcu_read_unlock(&head->srcu, srcu_idx);
return ret;
@@ -846,7 +862,7 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx,
file->f_mode);
- ret = nvme_ns_ioctl(ns, cmd, argp, file->f_mode);
+ ret = nvme_ns_ioctl(ns, cmd, argp, 0, file->f_mode);
out_unlock:
srcu_read_unlock(&head->srcu, srcu_idx);
return ret;
@@ -945,7 +961,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp,
kref_get(&ns->kref);
up_read(&ctrl->namespaces_rwsem);
- ret = nvme_user_cmd(ctrl, ns, argp, mode);
+ ret = nvme_user_cmd(ctrl, ns, argp, 0, mode);
nvme_put_ns(ns);
return ret;
@@ -962,9 +978,9 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd,
switch (cmd) {
case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_cmd(ctrl, NULL, argp, file->f_mode);
+ return nvme_user_cmd(ctrl, NULL, argp, 0, file->f_mode);
case NVME_IOCTL_ADMIN64_CMD:
- return nvme_user_cmd64(ctrl, NULL, argp, false, file->f_mode);
+ return nvme_user_cmd64(ctrl, NULL, argp, 0, file->f_mode);
case NVME_IOCTL_IO_CMD:
return nvme_dev_user_cmd(ctrl, argp, file->f_mode);
case NVME_IOCTL_RESET:
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index c03093b6813c..fc39d01e7b63 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -376,6 +376,8 @@ static void nvme_ns_head_submit_bio(struct bio *bio)
* pool from the original queue to allocate the bvecs from.
*/
bio = bio_split_to_limits(bio);
+ if (!bio)
+ return;
srcu_idx = srcu_read_lock(&head->srcu);
ns = nvme_find_path(head);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index b13baccedb4a..c11e0cfeef0f 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -110,6 +110,7 @@ struct nvme_queue;
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
static void nvme_delete_io_queues(struct nvme_dev *dev);
+static void nvme_update_attrs(struct nvme_dev *dev);
/*
* Represents an NVM Express device. Each nvme_dev is a PCI function.
@@ -1362,7 +1363,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
else
nvme_poll_irqdisable(nvmeq);
- if (blk_mq_request_completed(req)) {
+ if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT) {
dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, completion polled\n",
req->tag, nvmeq->qid);
@@ -1923,6 +1924,8 @@ static void nvme_map_cmb(struct nvme_dev *dev)
if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) ==
(NVME_CMBSZ_WDS | NVME_CMBSZ_RDS))
pci_p2pmem_publish(pdev, true);
+
+ nvme_update_attrs(dev);
}
static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
@@ -2209,6 +2212,11 @@ static const struct attribute_group *nvme_pci_dev_attr_groups[] = {
NULL,
};
+static void nvme_update_attrs(struct nvme_dev *dev)
+{
+ sysfs_update_group(&dev->ctrl.device->kobj, &nvme_pci_dev_attrs_group);
+}
+
/*
* nirqs is the number of interrupts available for write and read
* queues. The core already reserved an interrupt for the admin queue.
@@ -2509,18 +2517,12 @@ static int nvme_pci_enable(struct nvme_dev *dev)
{
int result = -ENOMEM;
struct pci_dev *pdev = to_pci_dev(dev->dev);
- int dma_address_bits = 64;
if (pci_enable_device_mem(pdev))
return result;
pci_set_master(pdev);
- if (dev->ctrl.quirks & NVME_QUIRK_DMA_ADDRESS_BITS_48)
- dma_address_bits = 48;
- if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(dma_address_bits)))
- goto disable;
-
if (readl(dev->bar + NVME_REG_CSTS) == -1) {
result = -ENODEV;
goto disable;
@@ -2533,7 +2535,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
*/
result = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
if (result < 0)
- return result;
+ goto disable;
dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
@@ -2586,8 +2588,13 @@ static int nvme_pci_enable(struct nvme_dev *dev)
pci_enable_pcie_error_reporting(pdev);
pci_save_state(pdev);
- return nvme_pci_configure_admin_queue(dev);
+ result = nvme_pci_configure_admin_queue(dev);
+ if (result)
+ goto free_irq;
+ return result;
+ free_irq:
+ pci_free_irq_vectors(pdev);
disable:
pci_disable_device(pdev);
return result;
@@ -2965,7 +2972,7 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node);
if (!dev)
- return NULL;
+ return ERR_PTR(-ENOMEM);
INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
mutex_init(&dev->shutdown_lock);
@@ -2993,7 +3000,11 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
quirks);
if (ret)
goto out_put_device;
-
+
+ if (dev->ctrl.quirks & NVME_QUIRK_DMA_ADDRESS_BITS_48)
+ dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
+ else
+ dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
dma_set_min_align_mask(&pdev->dev, NVME_CTRL_PAGE_SIZE - 1);
dma_set_max_seg_size(&pdev->dev, 0xffffffff);
@@ -3026,8 +3037,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
int result = -ENOMEM;
dev = nvme_pci_alloc_dev(pdev, id);
- if (!dev)
- return -ENOMEM;
+ if (IS_ERR(dev))
+ return PTR_ERR(dev);
result = nvme_dev_map(dev);
if (result)
@@ -3097,6 +3108,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
nvme_start_ctrl(&dev->ctrl);
nvme_put_ctrl(&dev->ctrl);
+ flush_work(&dev->ctrl.scan_work);
return 0;
out_disable:
@@ -3417,6 +3429,8 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */
.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN |
NVME_QUIRK_BOGUS_NID, },
+ { PCI_DEVICE(0x10ec, 0x5763), /* ADATA SX6000PNP */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS |
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
@@ -3495,7 +3509,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_SINGLE_VECTOR |
NVME_QUIRK_128_BYTES_SQES |
NVME_QUIRK_SHARED_TAGS |
- NVME_QUIRK_SKIP_CID_GEN },
+ NVME_QUIRK_SKIP_CID_GEN |
+ NVME_QUIRK_IDENTIFY_CNS },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ 0, }
};
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index ab2627e17bb9..1ab6601fdd5c 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1685,8 +1685,10 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport,
else {
queue = nvmet_fc_alloc_target_queue(iod->assoc, 0,
be16_to_cpu(rqst->assoc_cmd.sqsize));
- if (!queue)
+ if (!queue) {
ret = VERR_QUEUE_ALLOC_FAIL;
+ nvmet_fc_tgt_a_put(iod->assoc);
+ }
}
}
diff --git a/drivers/nvmem/brcm_nvram.c b/drivers/nvmem/brcm_nvram.c
index 34130449f2d2..39aa27942f28 100644
--- a/drivers/nvmem/brcm_nvram.c
+++ b/drivers/nvmem/brcm_nvram.c
@@ -98,6 +98,9 @@ static int brcm_nvram_parse(struct brcm_nvram *priv)
len = le32_to_cpu(header.len);
data = kzalloc(len, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
memcpy_fromio(data, priv->base, len);
data[len - 1] = '\0';
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index 321d7d63e068..34ee9d36ee7b 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -770,31 +770,32 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
return ERR_PTR(rval);
}
- if (config->wp_gpio)
- nvmem->wp_gpio = config->wp_gpio;
- else if (!config->ignore_wp)
+ nvmem->id = rval;
+
+ nvmem->dev.type = &nvmem_provider_type;
+ nvmem->dev.bus = &nvmem_bus_type;
+ nvmem->dev.parent = config->dev;
+
+ device_initialize(&nvmem->dev);
+
+ if (!config->ignore_wp)
nvmem->wp_gpio = gpiod_get_optional(config->dev, "wp",
GPIOD_OUT_HIGH);
if (IS_ERR(nvmem->wp_gpio)) {
- ida_free(&nvmem_ida, nvmem->id);
rval = PTR_ERR(nvmem->wp_gpio);
- kfree(nvmem);
- return ERR_PTR(rval);
+ nvmem->wp_gpio = NULL;
+ goto err_put_device;
}
kref_init(&nvmem->refcnt);
INIT_LIST_HEAD(&nvmem->cells);
- nvmem->id = rval;
nvmem->owner = config->owner;
if (!nvmem->owner && config->dev->driver)
nvmem->owner = config->dev->driver->owner;
nvmem->stride = config->stride ?: 1;
nvmem->word_size = config->word_size ?: 1;
nvmem->size = config->size;
- nvmem->dev.type = &nvmem_provider_type;
- nvmem->dev.bus = &nvmem_bus_type;
- nvmem->dev.parent = config->dev;
nvmem->root_only = config->root_only;
nvmem->priv = config->priv;
nvmem->type = config->type;
@@ -822,11 +823,8 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
break;
}
- if (rval) {
- ida_free(&nvmem_ida, nvmem->id);
- kfree(nvmem);
- return ERR_PTR(rval);
- }
+ if (rval)
+ goto err_put_device;
nvmem->read_only = device_property_present(config->dev, "read-only") ||
config->read_only || !nvmem->reg_write;
@@ -835,28 +833,22 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
nvmem->dev.groups = nvmem_dev_groups;
#endif
- dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name);
-
- rval = device_register(&nvmem->dev);
- if (rval)
- goto err_put_device;
-
if (nvmem->nkeepout) {
rval = nvmem_validate_keepouts(nvmem);
if (rval)
- goto err_device_del;
+ goto err_put_device;
}
if (config->compat) {
rval = nvmem_sysfs_setup_compat(nvmem, config);
if (rval)
- goto err_device_del;
+ goto err_put_device;
}
if (config->cells) {
rval = nvmem_add_cells(nvmem, config->cells, config->ncells);
if (rval)
- goto err_teardown_compat;
+ goto err_remove_cells;
}
rval = nvmem_add_cells_from_table(nvmem);
@@ -867,17 +859,20 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
if (rval)
goto err_remove_cells;
+ dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name);
+
+ rval = device_add(&nvmem->dev);
+ if (rval)
+ goto err_remove_cells;
+
blocking_notifier_call_chain(&nvmem_notifier, NVMEM_ADD, nvmem);
return nvmem;
err_remove_cells:
nvmem_device_remove_all_cells(nvmem);
-err_teardown_compat:
if (config->compat)
nvmem_sysfs_remove_compat(nvmem, config);
-err_device_del:
- device_del(&nvmem->dev);
err_put_device:
put_device(&nvmem->dev);
@@ -1242,16 +1237,21 @@ struct nvmem_cell *of_nvmem_cell_get(struct device_node *np, const char *id)
if (!cell_np)
return ERR_PTR(-ENOENT);
- nvmem_np = of_get_next_parent(cell_np);
- if (!nvmem_np)
+ nvmem_np = of_get_parent(cell_np);
+ if (!nvmem_np) {
+ of_node_put(cell_np);
return ERR_PTR(-EINVAL);
+ }
nvmem = __nvmem_device_get(nvmem_np, device_match_of_node);
of_node_put(nvmem_np);
- if (IS_ERR(nvmem))
+ if (IS_ERR(nvmem)) {
+ of_node_put(cell_np);
return ERR_CAST(nvmem);
+ }
cell_entry = nvmem_find_cell_entry_by_node(nvmem, cell_np);
+ of_node_put(cell_np);
if (!cell_entry) {
__nvmem_device_put(nvmem);
return ERR_PTR(-ENOENT);
diff --git a/drivers/nvmem/qcom-spmi-sdam.c b/drivers/nvmem/qcom-spmi-sdam.c
index 4fcb63507ecd..8499892044b7 100644
--- a/drivers/nvmem/qcom-spmi-sdam.c
+++ b/drivers/nvmem/qcom-spmi-sdam.c
@@ -166,6 +166,7 @@ static const struct of_device_id sdam_match_table[] = {
{ .compatible = "qcom,spmi-sdam" },
{},
};
+MODULE_DEVICE_TABLE(of, sdam_match_table);
static struct platform_driver sdam_driver = {
.driver = {
diff --git a/drivers/nvmem/sunxi_sid.c b/drivers/nvmem/sunxi_sid.c
index 5750e1f4bcdb..92dfe4cb10e3 100644
--- a/drivers/nvmem/sunxi_sid.c
+++ b/drivers/nvmem/sunxi_sid.c
@@ -41,8 +41,21 @@ static int sunxi_sid_read(void *context, unsigned int offset,
void *val, size_t bytes)
{
struct sunxi_sid *sid = context;
+ u32 word;
+
+ /* .stride = 4 so offset is guaranteed to be aligned */
+ __ioread32_copy(val, sid->base + sid->value_offset + offset, bytes / 4);
- memcpy_fromio(val, sid->base + sid->value_offset + offset, bytes);
+ val += round_down(bytes, 4);
+ offset += round_down(bytes, 4);
+ bytes = bytes % 4;
+
+ if (!bytes)
+ return 0;
+
+ /* Handle any trailing bytes */
+ word = readl_relaxed(sid->base + sid->value_offset + offset);
+ memcpy(val, &word, bytes);
return 0;
}
diff --git a/drivers/of/address.c b/drivers/of/address.c
index c34ac33b7338..67763e5b8c0e 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -965,8 +965,19 @@ int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map)
}
of_dma_range_parser_init(&parser, node);
- for_each_of_range(&parser, &range)
+ for_each_of_range(&parser, &range) {
+ if (range.cpu_addr == OF_BAD_ADDR) {
+ pr_err("translation of DMA address(%llx) to CPU address failed node(%pOF)\n",
+ range.bus_addr, node);
+ continue;
+ }
num_ranges++;
+ }
+
+ if (!num_ranges) {
+ ret = -EINVAL;
+ goto out;
+ }
r = kcalloc(num_ranges + 1, sizeof(*r), GFP_KERNEL);
if (!r) {
@@ -975,18 +986,16 @@ int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map)
}
/*
- * Record all info in the generic DMA ranges array for struct device.
+ * Record all info in the generic DMA ranges array for struct device,
+ * returning an error if we don't find any parsable ranges.
*/
*map = r;
of_dma_range_parser_init(&parser, node);
for_each_of_range(&parser, &range) {
pr_debug("dma_addr(%llx) cpu_addr(%llx) size(%llx)\n",
range.bus_addr, range.cpu_addr, range.size);
- if (range.cpu_addr == OF_BAD_ADDR) {
- pr_err("translation of DMA address(%llx) to CPU address failed node(%pOF)\n",
- range.bus_addr, node);
+ if (range.cpu_addr == OF_BAD_ADDR)
continue;
- }
r->cpu_start = range.cpu_addr;
r->dma_start = range.bus_addr;
r->size = range.size;
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index b2272bccf85c..d1a68b6d03b3 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -26,7 +26,6 @@
#include <linux/serial_core.h>
#include <linux/sysfs.h>
#include <linux/random.h>
-#include <linux/kmemleak.h>
#include <asm/setup.h> /* for COMMAND_LINE_SIZE */
#include <asm/page.h>
@@ -525,12 +524,9 @@ static int __init __reserved_mem_reserve_reg(unsigned long node,
size = dt_mem_next_cell(dt_root_size_cells, &prop);
if (size &&
- early_init_dt_reserve_memory(base, size, nomap) == 0) {
+ early_init_dt_reserve_memory(base, size, nomap) == 0)
pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n",
uname, &base, (unsigned long)(size / SZ_1M));
- if (!nomap)
- kmemleak_alloc_phys(base, size, 0);
- }
else
pr_err("Reserved memory: failed to reserve memory for node '%s': base %pa, size %lu MiB\n",
uname, &base, (unsigned long)(size / SZ_1M));
@@ -1099,7 +1095,7 @@ u64 __init dt_mem_next_cell(int s, const __be32 **cellp)
*/
int __init early_init_dt_scan_memory(void)
{
- int node;
+ int node, found_memory = 0;
const void *fdt = initial_boot_params;
fdt_for_each_subnode(node, fdt, 0) {
@@ -1139,6 +1135,8 @@ int __init early_init_dt_scan_memory(void)
early_init_dt_add_memory_arch(base, size);
+ found_memory = 1;
+
if (!hotpluggable)
continue;
@@ -1147,7 +1145,7 @@ int __init early_init_dt_scan_memory(void)
base, base + size);
}
}
- return 0;
+ return found_memory;
}
int __init early_init_dt_scan_chosen(char *cmdline)
@@ -1161,18 +1159,14 @@ int __init early_init_dt_scan_chosen(char *cmdline)
if (node < 0)
node = fdt_path_offset(fdt, "/chosen@0");
if (node < 0)
- return -ENOENT;
+ /* Handle the cmdline config options even if no /chosen node */
+ goto handle_cmdline;
chosen_node_offset = node;
early_init_dt_check_for_initrd(node);
early_init_dt_check_for_elfcorehdr(node);
- /* Retrieve command line */
- p = of_get_flat_dt_prop(node, "bootargs", &l);
- if (p != NULL && l > 0)
- strscpy(cmdline, p, min(l, COMMAND_LINE_SIZE));
-
rng_seed = of_get_flat_dt_prop(node, "rng-seed", &l);
if (rng_seed && l > 0) {
add_bootloader_randomness(rng_seed, l);
@@ -1185,6 +1179,32 @@ int __init early_init_dt_scan_chosen(char *cmdline)
fdt_totalsize(initial_boot_params));
}
+ /* Retrieve command line */
+ p = of_get_flat_dt_prop(node, "bootargs", &l);
+ if (p != NULL && l > 0)
+ strscpy(cmdline, p, min(l, COMMAND_LINE_SIZE));
+
+handle_cmdline:
+ /*
+ * CONFIG_CMDLINE is meant to be a default in case nothing else
+ * managed to set the command line, unless CONFIG_CMDLINE_FORCE
+ * is set in which case we override whatever was found earlier.
+ */
+#ifdef CONFIG_CMDLINE
+#if defined(CONFIG_CMDLINE_EXTEND)
+ strlcat(cmdline, " ", COMMAND_LINE_SIZE);
+ strlcat(cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+#elif defined(CONFIG_CMDLINE_FORCE)
+ strscpy(cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+#else
+ /* No arguments from boot loader, use kernel's cmdl*/
+ if (!((char *)cmdline)[0])
+ strscpy(cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+#endif
+#endif /* CONFIG_CMDLINE */
+
+ pr_debug("Command line is: %s\n", (char *)cmdline);
+
return 0;
}
@@ -1277,26 +1297,6 @@ void __init early_init_dt_scan_nodes(void)
if (rc)
pr_warn("No chosen node found, continuing without\n");
- /*
- * CONFIG_CMDLINE is meant to be a default in case nothing else
- * managed to set the command line, unless CONFIG_CMDLINE_FORCE
- * is set in which case we override whatever was found earlier.
- */
-#ifdef CONFIG_CMDLINE
-#if defined(CONFIG_CMDLINE_EXTEND)
- strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
- strlcat(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-#elif defined(CONFIG_CMDLINE_FORCE)
- strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-#else
- /* No arguments from boot loader, use kernel's cmdl */
- if (!boot_command_line[0])
- strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-#endif
-#endif /* CONFIG_CMDLINE */
-
- pr_debug("Command line is: %s\n", boot_command_line);
-
/* Setup memory, calling early_init_dt_add_memory_arch */
early_init_dt_scan_memory();
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 65f3b02a0e4e..f90975e00446 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -48,9 +48,10 @@ static int __init early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
err = memblock_mark_nomap(base, size);
if (err)
memblock_phys_free(base, size);
- kmemleak_ignore_phys(base);
}
+ kmemleak_ignore_phys(base);
+
return err;
}
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 81c8c227ab6b..b3878a98d27f 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -525,6 +525,7 @@ static int __init of_platform_default_populate_init(void)
if (IS_ENABLED(CONFIG_PPC)) {
struct device_node *boot_display = NULL;
struct platform_device *dev;
+ int display_number = 0;
int ret;
/* Check if we have a MacOS display without a node spec */
@@ -555,16 +556,23 @@ static int __init of_platform_default_populate_init(void)
if (!of_get_property(node, "linux,opened", NULL) ||
!of_get_property(node, "linux,boot-display", NULL))
continue;
- dev = of_platform_device_create(node, "of-display", NULL);
+ dev = of_platform_device_create(node, "of-display.0", NULL);
+ of_node_put(node);
if (WARN_ON(!dev))
return -ENOMEM;
boot_display = node;
+ display_number++;
break;
}
for_each_node_by_type(node, "display") {
+ char buf[14];
+ const char *of_display_format = "of-display.%d";
+
if (!of_get_property(node, "linux,opened", NULL) || node == boot_display)
continue;
- of_platform_device_create(node, "of-display", NULL);
+ ret = snprintf(buf, sizeof(buf), of_display_format, display_number++);
+ if (ret < sizeof(buf))
+ of_platform_device_create(node, buf, NULL);
}
} else {
diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c
index d6af5726ddf3..2a18f7ba2398 100644
--- a/drivers/parisc/pdc_stable.c
+++ b/drivers/parisc/pdc_stable.c
@@ -274,8 +274,7 @@ pdcspath_hwpath_write(struct pdcspath_entry *entry, const char *buf, size_t coun
/* We'll use a local copy of buf */
count = min_t(size_t, count, sizeof(in)-1);
- strncpy(in, buf, count);
- in[count] = '\0';
+ strscpy(in, buf, count + 1);
/* Let's clean up the target. 0xff is a blank pattern */
memset(&hwpath, 0xff, sizeof(hwpath));
@@ -388,8 +387,7 @@ pdcspath_layer_write(struct pdcspath_entry *entry, const char *buf, size_t count
/* We'll use a local copy of buf */
count = min_t(size_t, count, sizeof(in)-1);
- strncpy(in, buf, count);
- in[count] = '\0';
+ strscpy(in, buf, count + 1);
/* Let's clean up the target. 0 is a blank pattern */
memset(&layers, 0, sizeof(layers));
@@ -756,8 +754,7 @@ static ssize_t pdcs_auto_write(struct kobject *kobj,
/* We'll use a local copy of buf */
count = min_t(size_t, count, sizeof(in)-1);
- strncpy(in, buf, count);
- in[count] = '\0';
+ strscpy(in, buf, count + 1);
/* Current flags are stored in primary boot path entry */
pathentry = &pdcspath_entry_primary;
diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
index a0d2713f0e88..99ec91e2a5cf 100644
--- a/drivers/pci/controller/dwc/Kconfig
+++ b/drivers/pci/controller/dwc/Kconfig
@@ -225,7 +225,7 @@ config PCIE_ARTPEC6_EP
config PCIE_BT1
tristate "Baikal-T1 PCIe controller"
depends on MIPS_BAIKAL_T1 || COMPILE_TEST
- depends on PCI_MSI_IRQ_DOMAIN
+ depends on PCI_MSI
select PCIE_DW_HOST
help
Enables support for the PCIe controller in the Baikal-T1 SoC to work
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index fba95486caaf..5641786bd020 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1665,7 +1665,6 @@ int pci_save_state(struct pci_dev *dev)
return i;
pci_save_ltr_state(dev);
- pci_save_aspm_l1ss_state(dev);
pci_save_dpc_state(dev);
pci_save_aer_state(dev);
pci_save_ptm_state(dev);
@@ -1772,7 +1771,6 @@ void pci_restore_state(struct pci_dev *dev)
* LTR itself (in the PCIe capability).
*/
pci_restore_ltr_state(dev);
- pci_restore_aspm_l1ss_state(dev);
pci_restore_pcie_state(dev);
pci_restore_pasid_state(dev);
@@ -3465,11 +3463,6 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev)
if (error)
pci_err(dev, "unable to allocate suspend buffer for LTR\n");
- error = pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_L1SS,
- 2 * sizeof(u32));
- if (error)
- pci_err(dev, "unable to allocate suspend buffer for ASPM-L1SS\n");
-
pci_allocate_vc_save_buffers(dev);
}
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 9ed3b5550043..9049d07d3aae 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -566,14 +566,10 @@ bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
void pcie_aspm_init_link_state(struct pci_dev *pdev);
void pcie_aspm_exit_link_state(struct pci_dev *pdev);
void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
-void pci_save_aspm_l1ss_state(struct pci_dev *dev);
-void pci_restore_aspm_l1ss_state(struct pci_dev *dev);
#else
static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
-static inline void pci_save_aspm_l1ss_state(struct pci_dev *dev) { }
-static inline void pci_restore_aspm_l1ss_state(struct pci_dev *dev) { }
#endif
#ifdef CONFIG_PCIE_ECRC
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 53a1fa306e1e..4b4184563a92 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -470,31 +470,6 @@ static void pci_clear_and_set_dword(struct pci_dev *pdev, int pos,
pci_write_config_dword(pdev, pos, val);
}
-static void aspm_program_l1ss(struct pci_dev *dev, u32 ctl1, u32 ctl2)
-{
- u16 l1ss = dev->l1ss;
- u32 l1_2_enable;
-
- /*
- * Per PCIe r6.0, sec 5.5.4, T_POWER_ON in PCI_L1SS_CTL2 must be
- * programmed prior to setting the L1.2 enable bits in PCI_L1SS_CTL1.
- */
- pci_write_config_dword(dev, l1ss + PCI_L1SS_CTL2, ctl2);
-
- /*
- * In addition, Common_Mode_Restore_Time and LTR_L1.2_THRESHOLD in
- * PCI_L1SS_CTL1 must be programmed *before* setting the L1.2
- * enable bits, even though they're all in PCI_L1SS_CTL1.
- */
- l1_2_enable = ctl1 & PCI_L1SS_CTL1_L1_2_MASK;
- ctl1 &= ~PCI_L1SS_CTL1_L1_2_MASK;
-
- pci_write_config_dword(dev, l1ss + PCI_L1SS_CTL1, ctl1);
- if (l1_2_enable)
- pci_write_config_dword(dev, l1ss + PCI_L1SS_CTL1,
- ctl1 | l1_2_enable);
-}
-
/* Calculate L1.2 PM substate timing parameters */
static void aspm_calc_l1ss_info(struct pcie_link_state *link,
u32 parent_l1ss_cap, u32 child_l1ss_cap)
@@ -504,6 +479,7 @@ static void aspm_calc_l1ss_info(struct pcie_link_state *link,
u32 t_common_mode, t_power_on, l1_2_threshold, scale, value;
u32 ctl1 = 0, ctl2 = 0;
u32 pctl1, pctl2, cctl1, cctl2;
+ u32 pl1_2_enables, cl1_2_enables;
if (!(link->aspm_support & ASPM_STATE_L1_2_MASK))
return;
@@ -552,21 +528,39 @@ static void aspm_calc_l1ss_info(struct pcie_link_state *link,
ctl2 == pctl2 && ctl2 == cctl2)
return;
- pctl1 &= ~(PCI_L1SS_CTL1_CM_RESTORE_TIME |
- PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
- PCI_L1SS_CTL1_LTR_L12_TH_SCALE);
- pctl1 |= (ctl1 & (PCI_L1SS_CTL1_CM_RESTORE_TIME |
- PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
- PCI_L1SS_CTL1_LTR_L12_TH_SCALE));
- aspm_program_l1ss(parent, pctl1, ctl2);
-
- cctl1 &= ~(PCI_L1SS_CTL1_CM_RESTORE_TIME |
- PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
- PCI_L1SS_CTL1_LTR_L12_TH_SCALE);
- cctl1 |= (ctl1 & (PCI_L1SS_CTL1_CM_RESTORE_TIME |
- PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
- PCI_L1SS_CTL1_LTR_L12_TH_SCALE));
- aspm_program_l1ss(child, cctl1, ctl2);
+ /* Disable L1.2 while updating. See PCIe r5.0, sec 5.5.4, 7.8.3.3 */
+ pl1_2_enables = pctl1 & PCI_L1SS_CTL1_L1_2_MASK;
+ cl1_2_enables = cctl1 & PCI_L1SS_CTL1_L1_2_MASK;
+
+ if (pl1_2_enables || cl1_2_enables) {
+ pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1,
+ PCI_L1SS_CTL1_L1_2_MASK, 0);
+ pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
+ PCI_L1SS_CTL1_L1_2_MASK, 0);
+ }
+
+ /* Program T_POWER_ON times in both ports */
+ pci_write_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2, ctl2);
+ pci_write_config_dword(child, child->l1ss + PCI_L1SS_CTL2, ctl2);
+
+ /* Program Common_Mode_Restore_Time in upstream device */
+ pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
+ PCI_L1SS_CTL1_CM_RESTORE_TIME, ctl1);
+
+ /* Program LTR_L1.2_THRESHOLD time in both ports */
+ pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
+ PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
+ PCI_L1SS_CTL1_LTR_L12_TH_SCALE, ctl1);
+ pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1,
+ PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
+ PCI_L1SS_CTL1_LTR_L12_TH_SCALE, ctl1);
+
+ if (pl1_2_enables || cl1_2_enables) {
+ pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, 0,
+ pl1_2_enables);
+ pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1, 0,
+ cl1_2_enables);
+ }
}
static void aspm_l1ss_init(struct pcie_link_state *link)
@@ -757,43 +751,6 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state)
PCI_L1SS_CTL1_L1SS_MASK, val);
}
-void pci_save_aspm_l1ss_state(struct pci_dev *dev)
-{
- struct pci_cap_saved_state *save_state;
- u16 l1ss = dev->l1ss;
- u32 *cap;
-
- if (!l1ss)
- return;
-
- save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_L1SS);
- if (!save_state)
- return;
-
- cap = (u32 *)&save_state->cap.data[0];
- pci_read_config_dword(dev, l1ss + PCI_L1SS_CTL2, cap++);
- pci_read_config_dword(dev, l1ss + PCI_L1SS_CTL1, cap++);
-}
-
-void pci_restore_aspm_l1ss_state(struct pci_dev *dev)
-{
- struct pci_cap_saved_state *save_state;
- u32 *cap, ctl1, ctl2;
- u16 l1ss = dev->l1ss;
-
- if (!l1ss)
- return;
-
- save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_L1SS);
- if (!save_state)
- return;
-
- cap = (u32 *)&save_state->cap.data[0];
- ctl2 = *cap++;
- ctl1 = *cap;
- aspm_program_l1ss(dev, ctl1, ctl2);
-}
-
static void pcie_config_aspm_dev(struct pci_dev *pdev, u32 val)
{
pcie_capability_clear_and_set_word(pdev, PCI_EXP_LNKCTL,
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 7378e2f3e525..fcd029ca2eb1 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -1055,14 +1055,12 @@ out:
return err;
}
-static int pcifront_xenbus_remove(struct xenbus_device *xdev)
+static void pcifront_xenbus_remove(struct xenbus_device *xdev)
{
struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
if (pdev)
free_pdev(pdev);
-
- return 0;
}
static const struct xenbus_device_id xenpci_ids[] = {
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index b80a9b74662b..1deb61b22bc7 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -1576,7 +1576,6 @@ static int arm_cmn_event_init(struct perf_event *event)
hw->dn++;
continue;
}
- hw->dtcs_used |= arm_cmn_node_to_xp(cmn, dn)->dtc;
hw->num_dns++;
if (bynodeid)
break;
@@ -1589,6 +1588,12 @@ static int arm_cmn_event_init(struct perf_event *event)
nodeid, nid.x, nid.y, nid.port, nid.dev, type);
return -EINVAL;
}
+ /*
+ * Keep assuming non-cycles events count in all DTC domains; turns out
+ * it's hard to make a worthwhile optimisation around this, short of
+ * going all-in with domain-local counter allocation as well.
+ */
+ hw->dtcs_used = (1U << cmn->num_dtcs) - 1;
return arm_cmn_validate_group(cmn, event);
}
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 9b593f985805..40f70f83daba 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -550,13 +550,7 @@ static void armpmu_disable(struct pmu *pmu)
static bool armpmu_filter(struct pmu *pmu, int cpu)
{
struct arm_pmu *armpmu = to_arm_pmu(pmu);
- bool ret;
-
- ret = cpumask_test_cpu(cpu, &armpmu->supported_cpus);
- if (ret && armpmu->filter)
- return armpmu->filter(pmu, cpu);
-
- return ret;
+ return !cpumask_test_cpu(cpu, &armpmu->supported_cpus);
}
static ssize_t cpus_show(struct device *dev,
diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
index 7585e8080b77..afc63552ecaf 100644
--- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
+++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
@@ -255,7 +255,7 @@ static int imx8_pcie_phy_probe(struct platform_device *pdev)
imx8_phy->perst =
devm_reset_control_get_exclusive(dev, "perst");
if (IS_ERR(imx8_phy->perst))
- dev_err_probe(dev, PTR_ERR(imx8_phy->perst),
+ return dev_err_probe(dev, PTR_ERR(imx8_phy->perst),
"Failed to get PCIE PHY PERST control\n");
}
diff --git a/drivers/phy/phy-can-transceiver.c b/drivers/phy/phy-can-transceiver.c
index 95c6dbb52da7..ce511ad5d369 100644
--- a/drivers/phy/phy-can-transceiver.c
+++ b/drivers/phy/phy-can-transceiver.c
@@ -99,6 +99,7 @@ static int can_transceiver_phy_probe(struct platform_device *pdev)
struct gpio_desc *standby_gpio;
struct gpio_desc *enable_gpio;
u32 max_bitrate = 0;
+ int err;
can_transceiver_phy = devm_kzalloc(dev, sizeof(struct can_transceiver_phy), GFP_KERNEL);
if (!can_transceiver_phy)
@@ -124,8 +125,8 @@ static int can_transceiver_phy_probe(struct platform_device *pdev)
return PTR_ERR(phy);
}
- device_property_read_u32(dev, "max-bitrate", &max_bitrate);
- if (!max_bitrate)
+ err = device_property_read_u32(dev, "max-bitrate", &max_bitrate);
+ if ((err != -EINVAL) && !max_bitrate)
dev_warn(dev, "Invalid value for transceiver max bitrate. Ignoring bitrate limit\n");
phy->attrs.max_link_rate = max_bitrate;
diff --git a/drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c b/drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c
index 8807e59a1162..a52a9bf13b75 100644
--- a/drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c
+++ b/drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c
@@ -401,26 +401,13 @@ static const struct hsphy_init_seq init_seq_femtophy[] = {
HSPHY_INIT_CFG(0x90, 0x60, 0),
};
-static const struct hsphy_init_seq init_seq_mdm9607[] = {
- HSPHY_INIT_CFG(0x80, 0x44, 0),
- HSPHY_INIT_CFG(0x81, 0x38, 0),
- HSPHY_INIT_CFG(0x82, 0x24, 0),
- HSPHY_INIT_CFG(0x83, 0x13, 0),
-};
-
static const struct hsphy_data hsphy_data_femtophy = {
.init_seq = init_seq_femtophy,
.init_seq_num = ARRAY_SIZE(init_seq_femtophy),
};
-static const struct hsphy_data hsphy_data_mdm9607 = {
- .init_seq = init_seq_mdm9607,
- .init_seq_num = ARRAY_SIZE(init_seq_mdm9607),
-};
-
static const struct of_device_id qcom_snps_hsphy_match[] = {
{ .compatible = "qcom,usb-hs-28nm-femtophy", .data = &hsphy_data_femtophy, },
- { .compatible = "qcom,usb-hs-28nm-mdm9607", .data = &hsphy_data_mdm9607, },
{ },
};
MODULE_DEVICE_TABLE(of, qcom_snps_hsphy_match);
diff --git a/drivers/phy/renesas/r8a779f0-ether-serdes.c b/drivers/phy/renesas/r8a779f0-ether-serdes.c
index ec6594e6dc27..e7588a940d69 100644
--- a/drivers/phy/renesas/r8a779f0-ether-serdes.c
+++ b/drivers/phy/renesas/r8a779f0-ether-serdes.c
@@ -126,7 +126,7 @@ r8a779f0_eth_serdes_chan_setting(struct r8a779f0_eth_serdes_channel *channel)
r8a779f0_eth_serdes_write32(channel->addr, 0x0160, 0x180, 0x0007);
r8a779f0_eth_serdes_write32(channel->addr, 0x01ac, 0x180, 0x0000);
r8a779f0_eth_serdes_write32(channel->addr, 0x00c4, 0x180, 0x0310);
- r8a779f0_eth_serdes_write32(channel->addr, 0x00c8, 0x380, 0x0101);
+ r8a779f0_eth_serdes_write32(channel->addr, 0x00c8, 0x180, 0x0101);
ret = r8a779f0_eth_serdes_reg_wait(channel, 0x00c8, 0x0180, BIT(0), 0);
if (ret)
return ret;
diff --git a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c
index e6ededc51523..a0bc10aa7961 100644
--- a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c
+++ b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c
@@ -485,8 +485,10 @@ static int rockchip_usb2phy_power_on(struct phy *phy)
return ret;
ret = property_enable(base, &rport->port_cfg->phy_sus, false);
- if (ret)
+ if (ret) {
+ clk_disable_unprepare(rphy->clk480m);
return ret;
+ }
/* waiting for the utmi_clk to become stable */
usleep_range(1500, 2000);
diff --git a/drivers/phy/sunplus/phy-sunplus-usb2.c b/drivers/phy/sunplus/phy-sunplus-usb2.c
index e827b79f6d49..56de41091d63 100644
--- a/drivers/phy/sunplus/phy-sunplus-usb2.c
+++ b/drivers/phy/sunplus/phy-sunplus-usb2.c
@@ -254,6 +254,9 @@ static int sp_usb_phy_probe(struct platform_device *pdev)
return PTR_ERR(usbphy->phy_regs);
usbphy->moon4_res_mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, "moon4");
+ if (!usbphy->moon4_res_mem)
+ return -EINVAL;
+
usbphy->moon4_regs = devm_ioremap(&pdev->dev, usbphy->moon4_res_mem->start,
resource_size(usbphy->moon4_res_mem));
if (!usbphy->moon4_regs)
diff --git a/drivers/phy/ti/Kconfig b/drivers/phy/ti/Kconfig
index 15a3bcf32308..b905902d5750 100644
--- a/drivers/phy/ti/Kconfig
+++ b/drivers/phy/ti/Kconfig
@@ -23,7 +23,7 @@ config PHY_DM816X_USB
config PHY_AM654_SERDES
tristate "TI AM654 SERDES support"
- depends on OF && ARCH_K3 || COMPILE_TEST
+ depends on OF && (ARCH_K3 || COMPILE_TEST)
depends on COMMON_CLK
select GENERIC_PHY
select MULTIPLEXER
@@ -35,7 +35,7 @@ config PHY_AM654_SERDES
config PHY_J721E_WIZ
tristate "TI J721E WIZ (SERDES Wrapper) support"
- depends on OF && ARCH_K3 || COMPILE_TEST
+ depends on OF && (ARCH_K3 || COMPILE_TEST)
depends on HAS_IOMEM && OF_ADDRESS
depends on COMMON_CLK
select GENERIC_PHY
diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c
index 3945612900e6..9c6ee46ac7a0 100644
--- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c
+++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c
@@ -93,10 +93,19 @@ static int aspeed_sig_expr_enable(struct aspeed_pinmux_data *ctx,
static int aspeed_sig_expr_disable(struct aspeed_pinmux_data *ctx,
const struct aspeed_sig_expr *expr)
{
+ int ret;
+
pr_debug("Disabling signal %s for %s\n", expr->signal,
expr->function);
- return aspeed_sig_expr_set(ctx, expr, false);
+ ret = aspeed_sig_expr_eval(ctx, expr, true);
+ if (ret < 0)
+ return ret;
+
+ if (ret)
+ return aspeed_sig_expr_set(ctx, expr, false);
+
+ return 0;
}
/**
@@ -114,7 +123,7 @@ static int aspeed_disable_sig(struct aspeed_pinmux_data *ctx,
int ret = 0;
if (!exprs)
- return true;
+ return -EINVAL;
while (*exprs && !ret) {
ret = aspeed_sig_expr_disable(ctx, *exprs);
diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c
index cc3aaba24188..e49f271de936 100644
--- a/drivers/pinctrl/intel/pinctrl-intel.c
+++ b/drivers/pinctrl/intel/pinctrl-intel.c
@@ -1709,6 +1709,12 @@ const struct intel_pinctrl_soc_data *intel_pinctrl_get_soc_data(struct platform_
EXPORT_SYMBOL_GPL(intel_pinctrl_get_soc_data);
#ifdef CONFIG_PM_SLEEP
+static bool __intel_gpio_is_direct_irq(u32 value)
+{
+ return (value & PADCFG0_GPIROUTIOXAPIC) && (value & PADCFG0_GPIOTXDIS) &&
+ (__intel_gpio_get_gpio_mode(value) == PADCFG0_PMODE_GPIO);
+}
+
static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int pin)
{
const struct pin_desc *pd = pin_desc_get(pctrl->pctldev, pin);
@@ -1742,8 +1748,7 @@ static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int
* See https://bugzilla.kernel.org/show_bug.cgi?id=214749.
*/
value = readl(intel_get_padcfg(pctrl, pin, PADCFG0));
- if ((value & PADCFG0_GPIROUTIOXAPIC) && (value & PADCFG0_GPIOTXDIS) &&
- (__intel_gpio_get_gpio_mode(value) == PADCFG0_PMODE_GPIO))
+ if (__intel_gpio_is_direct_irq(value))
return true;
return false;
@@ -1873,7 +1878,12 @@ int intel_pinctrl_resume_noirq(struct device *dev)
for (i = 0; i < pctrl->soc->npins; i++) {
const struct pinctrl_pin_desc *desc = &pctrl->soc->pins[i];
- if (!intel_pinctrl_should_save(pctrl, desc->number))
+ if (!(intel_pinctrl_should_save(pctrl, desc->number) ||
+ /*
+ * If the firmware mangled the register contents too much,
+ * check the saved value for the Direct IRQ mode.
+ */
+ __intel_gpio_is_direct_irq(pads[i].padcfg0)))
continue;
intel_restore_padcfg(pctrl, desc->number, PADCFG0, pads[i].padcfg0);
diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8195.c b/drivers/pinctrl/mediatek/pinctrl-mt8195.c
index 89557c7ed2ab..09c4dcef9338 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mt8195.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mt8195.c
@@ -659,7 +659,7 @@ static const struct mtk_pin_field_calc mt8195_pin_drv_range[] = {
PIN_FIELD_BASE(10, 10, 4, 0x010, 0x10, 9, 3),
PIN_FIELD_BASE(11, 11, 4, 0x000, 0x10, 24, 3),
PIN_FIELD_BASE(12, 12, 4, 0x010, 0x10, 12, 3),
- PIN_FIELD_BASE(13, 13, 4, 0x010, 0x10, 27, 3),
+ PIN_FIELD_BASE(13, 13, 4, 0x000, 0x10, 27, 3),
PIN_FIELD_BASE(14, 14, 4, 0x010, 0x10, 15, 3),
PIN_FIELD_BASE(15, 15, 4, 0x010, 0x10, 0, 3),
PIN_FIELD_BASE(16, 16, 4, 0x010, 0x10, 18, 3),
@@ -708,7 +708,7 @@ static const struct mtk_pin_field_calc mt8195_pin_drv_range[] = {
PIN_FIELD_BASE(78, 78, 3, 0x000, 0x10, 15, 3),
PIN_FIELD_BASE(79, 79, 3, 0x000, 0x10, 18, 3),
PIN_FIELD_BASE(80, 80, 3, 0x000, 0x10, 21, 3),
- PIN_FIELD_BASE(81, 81, 3, 0x000, 0x10, 28, 3),
+ PIN_FIELD_BASE(81, 81, 3, 0x000, 0x10, 24, 3),
PIN_FIELD_BASE(82, 82, 3, 0x000, 0x10, 27, 3),
PIN_FIELD_BASE(83, 83, 3, 0x010, 0x10, 0, 3),
PIN_FIELD_BASE(84, 84, 3, 0x010, 0x10, 3, 3),
diff --git a/drivers/pinctrl/nomadik/pinctrl-ab8500.c b/drivers/pinctrl/nomadik/pinctrl-ab8500.c
index 3106a21cd277..d7b244df058f 100644
--- a/drivers/pinctrl/nomadik/pinctrl-ab8500.c
+++ b/drivers/pinctrl/nomadik/pinctrl-ab8500.c
@@ -6,9 +6,10 @@
*/
#include <linux/kernel.h>
-#include <linux/gpio/driver.h>
#include <linux/pinctrl/pinctrl.h>
+
#include <linux/mfd/abx500/ab8500.h>
+
#include "pinctrl-abx500.h"
/* All the pins that can be used for GPIO and some other functions */
diff --git a/drivers/pinctrl/nomadik/pinctrl-ab8505.c b/drivers/pinctrl/nomadik/pinctrl-ab8505.c
index b93af1fb37f0..45aa958b573e 100644
--- a/drivers/pinctrl/nomadik/pinctrl-ab8505.c
+++ b/drivers/pinctrl/nomadik/pinctrl-ab8505.c
@@ -6,9 +6,10 @@
*/
#include <linux/kernel.h>
-#include <linux/gpio/driver.h>
#include <linux/pinctrl/pinctrl.h>
+
#include <linux/mfd/abx500/ab8500.h>
+
#include "pinctrl-abx500.h"
/* All the pins that can be used for GPIO and some other functions */
diff --git a/drivers/pinctrl/nomadik/pinctrl-abx500.c b/drivers/pinctrl/nomadik/pinctrl-abx500.c
index 7aa534576a45..28c3403df1b0 100644
--- a/drivers/pinctrl/nomadik/pinctrl-abx500.c
+++ b/drivers/pinctrl/nomadik/pinctrl-abx500.c
@@ -6,33 +6,37 @@
*
* Driver allows to use AxB5xx unused pins to be used as GPIO
*/
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/init.h>
+#include <linux/bitops.h>
#include <linux/err.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/platform_device.h>
#include <linux/gpio/driver.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/irqdomain.h>
-#include <linux/interrupt.h>
-#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
#include <linux/mfd/abx500.h>
#include <linux/mfd/abx500/ab8500.h>
-#include <linux/pinctrl/pinctrl.h>
+
#include <linux/pinctrl/consumer.h>
-#include <linux/pinctrl/pinmux.h>
-#include <linux/pinctrl/pinconf.h>
-#include <linux/pinctrl/pinconf-generic.h>
#include <linux/pinctrl/machine.h>
+#include <linux/pinctrl/pinconf-generic.h>
+#include <linux/pinctrl/pinconf.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/pinmux.h>
-#include "pinctrl-abx500.h"
#include "../core.h"
#include "../pinconf.h"
#include "../pinctrl-utils.h"
+#include "pinctrl-abx500.h"
+
/*
* GPIO registers offset
* Bank: 0x10
diff --git a/drivers/pinctrl/nomadik/pinctrl-abx500.h b/drivers/pinctrl/nomadik/pinctrl-abx500.h
index 90bb12fe8073..d67522084675 100644
--- a/drivers/pinctrl/nomadik/pinctrl-abx500.h
+++ b/drivers/pinctrl/nomadik/pinctrl-abx500.h
@@ -2,6 +2,10 @@
#ifndef PINCTRL_PINCTRL_ABx500_H
#define PINCTRL_PINCTRL_ABx500_H
+#include <linux/types.h>
+
+struct pinctrl_pin_desc;
+
/* Package definitions */
#define PINCTRL_AB8500 0
#define PINCTRL_AB8505 1
diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik-db8500.c b/drivers/pinctrl/nomadik/pinctrl-nomadik-db8500.c
index 758d21f0a850..490e0959e8be 100644
--- a/drivers/pinctrl/nomadik/pinctrl-nomadik-db8500.c
+++ b/drivers/pinctrl/nomadik/pinctrl-nomadik-db8500.c
@@ -1,6 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
+#include <linux/types.h>
+
#include <linux/pinctrl/pinctrl.h>
+
#include "pinctrl-nomadik.h"
/* All the pins that can be used for GPIO and some other functions */
diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik-stn8815.c b/drivers/pinctrl/nomadik/pinctrl-nomadik-stn8815.c
index c0d7c86d0939..1552222ac68e 100644
--- a/drivers/pinctrl/nomadik/pinctrl-nomadik-stn8815.c
+++ b/drivers/pinctrl/nomadik/pinctrl-nomadik-stn8815.c
@@ -1,6 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
+#include <linux/types.h>
+
#include <linux/pinctrl/pinctrl.h>
+
#include "pinctrl-nomadik.h"
/* All the pins that can be used for GPIO and some other functions */
diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c
index f7d02513d8cc..86a638077a69 100644
--- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c
+++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c
@@ -7,30 +7,34 @@
* Rewritten based on work by Prafulla WADASKAR <prafulla.wadaskar@st.com>
* Copyright (C) 2011-2013 Linus Walleij <linus.walleij@linaro.org>
*/
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
+#include <linux/bitops.h>
#include <linux/clk.h>
+#include <linux/device.h>
#include <linux/err.h>
#include <linux/gpio/driver.h>
-#include <linux/spinlock.h>
+#include <linux/init.h>
#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/of_device.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
#include <linux/of_address.h>
-#include <linux/bitops.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+/* Since we request GPIOs from ourself */
+#include <linux/pinctrl/consumer.h>
#include <linux/pinctrl/machine.h>
+#include <linux/pinctrl/pinconf.h>
#include <linux/pinctrl/pinctrl.h>
#include <linux/pinctrl/pinmux.h>
-#include <linux/pinctrl/pinconf.h>
-/* Since we request GPIOs from ourself */
-#include <linux/pinctrl/consumer.h>
-#include "pinctrl-nomadik.h"
+
#include "../core.h"
#include "../pinctrl-utils.h"
+#include "pinctrl-nomadik.h"
+
/*
* The GPIO module in the Nomadik family of Systems-on-Chip is an
* AMBA device, managing 32 pins and alternate functions. The logic block
@@ -907,8 +911,6 @@ static int nmk_gpio_get_mode(struct nmk_gpio_chip *nmk_chip, int offset)
return (afunc ? NMK_GPIO_ALT_A : 0) | (bfunc ? NMK_GPIO_ALT_B : 0);
}
-#include <linux/seq_file.h>
-
static void nmk_gpio_dbg_show_one(struct seq_file *s,
struct pinctrl_dev *pctldev, struct gpio_chip *chip,
unsigned offset, unsigned gpio)
diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.h b/drivers/pinctrl/nomadik/pinctrl-nomadik.h
index 84e297757335..1ef2559bc571 100644
--- a/drivers/pinctrl/nomadik/pinctrl-nomadik.h
+++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.h
@@ -2,6 +2,11 @@
#ifndef PINCTRL_PINCTRL_NOMADIK_H
#define PINCTRL_PINCTRL_NOMADIK_H
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include <linux/pinctrl/pinctrl.h>
+
/* Package definitions */
#define PINCTRL_NMK_STN8815 0
#define PINCTRL_NMK_DB8500 1
diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index 9bc6e3922e78..32c3edaf9038 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -365,6 +365,7 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc)
} else {
debounce_enable = " ∅";
+ time = 0;
}
snprintf(debounce_value, sizeof(debounce_value), "%u", time * unit);
seq_printf(s, "debounce %s (🕑 %sus)| ", debounce_enable, debounce_value);
diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c
index da974ff2d75d..5eeac92f610a 100644
--- a/drivers/pinctrl/pinctrl-rockchip.c
+++ b/drivers/pinctrl/pinctrl-rockchip.c
@@ -926,19 +926,19 @@ static struct rockchip_mux_route_data rk3568_mux_route_data[] = {
RK_MUXROUTE_PMU(0, RK_PB5, 4, 0x0110, WRITE_MASK_VAL(3, 2, 1)), /* PWM1 IO mux M1 */
RK_MUXROUTE_PMU(0, RK_PC1, 1, 0x0110, WRITE_MASK_VAL(5, 4, 0)), /* PWM2 IO mux M0 */
RK_MUXROUTE_PMU(0, RK_PB6, 4, 0x0110, WRITE_MASK_VAL(5, 4, 1)), /* PWM2 IO mux M1 */
- RK_MUXROUTE_PMU(0, RK_PB3, 2, 0x0300, WRITE_MASK_VAL(0, 0, 0)), /* CAN0 IO mux M0 */
+ RK_MUXROUTE_GRF(0, RK_PB3, 2, 0x0300, WRITE_MASK_VAL(0, 0, 0)), /* CAN0 IO mux M0 */
RK_MUXROUTE_GRF(2, RK_PA1, 4, 0x0300, WRITE_MASK_VAL(0, 0, 1)), /* CAN0 IO mux M1 */
RK_MUXROUTE_GRF(1, RK_PA1, 3, 0x0300, WRITE_MASK_VAL(2, 2, 0)), /* CAN1 IO mux M0 */
RK_MUXROUTE_GRF(4, RK_PC3, 3, 0x0300, WRITE_MASK_VAL(2, 2, 1)), /* CAN1 IO mux M1 */
RK_MUXROUTE_GRF(4, RK_PB5, 3, 0x0300, WRITE_MASK_VAL(4, 4, 0)), /* CAN2 IO mux M0 */
RK_MUXROUTE_GRF(2, RK_PB2, 4, 0x0300, WRITE_MASK_VAL(4, 4, 1)), /* CAN2 IO mux M1 */
RK_MUXROUTE_GRF(4, RK_PC4, 1, 0x0300, WRITE_MASK_VAL(6, 6, 0)), /* HPDIN IO mux M0 */
- RK_MUXROUTE_PMU(0, RK_PC2, 2, 0x0300, WRITE_MASK_VAL(6, 6, 1)), /* HPDIN IO mux M1 */
+ RK_MUXROUTE_GRF(0, RK_PC2, 2, 0x0300, WRITE_MASK_VAL(6, 6, 1)), /* HPDIN IO mux M1 */
RK_MUXROUTE_GRF(3, RK_PB1, 3, 0x0300, WRITE_MASK_VAL(8, 8, 0)), /* GMAC1 IO mux M0 */
RK_MUXROUTE_GRF(4, RK_PA7, 3, 0x0300, WRITE_MASK_VAL(8, 8, 1)), /* GMAC1 IO mux M1 */
RK_MUXROUTE_GRF(4, RK_PD1, 1, 0x0300, WRITE_MASK_VAL(10, 10, 0)), /* HDMITX IO mux M0 */
- RK_MUXROUTE_PMU(0, RK_PC7, 1, 0x0300, WRITE_MASK_VAL(10, 10, 1)), /* HDMITX IO mux M1 */
- RK_MUXROUTE_PMU(0, RK_PB6, 1, 0x0300, WRITE_MASK_VAL(14, 14, 0)), /* I2C2 IO mux M0 */
+ RK_MUXROUTE_GRF(0, RK_PC7, 1, 0x0300, WRITE_MASK_VAL(10, 10, 1)), /* HDMITX IO mux M1 */
+ RK_MUXROUTE_GRF(0, RK_PB6, 1, 0x0300, WRITE_MASK_VAL(14, 14, 0)), /* I2C2 IO mux M0 */
RK_MUXROUTE_GRF(4, RK_PB4, 1, 0x0300, WRITE_MASK_VAL(14, 14, 1)), /* I2C2 IO mux M1 */
RK_MUXROUTE_GRF(1, RK_PA0, 1, 0x0304, WRITE_MASK_VAL(0, 0, 0)), /* I2C3 IO mux M0 */
RK_MUXROUTE_GRF(3, RK_PB6, 4, 0x0304, WRITE_MASK_VAL(0, 0, 1)), /* I2C3 IO mux M1 */
@@ -964,7 +964,7 @@ static struct rockchip_mux_route_data rk3568_mux_route_data[] = {
RK_MUXROUTE_GRF(4, RK_PC3, 1, 0x0308, WRITE_MASK_VAL(12, 12, 1)), /* PWM15 IO mux M1 */
RK_MUXROUTE_GRF(3, RK_PD2, 3, 0x0308, WRITE_MASK_VAL(14, 14, 0)), /* SDMMC2 IO mux M0 */
RK_MUXROUTE_GRF(3, RK_PA5, 5, 0x0308, WRITE_MASK_VAL(14, 14, 1)), /* SDMMC2 IO mux M1 */
- RK_MUXROUTE_PMU(0, RK_PB5, 2, 0x030c, WRITE_MASK_VAL(0, 0, 0)), /* SPI0 IO mux M0 */
+ RK_MUXROUTE_GRF(0, RK_PB5, 2, 0x030c, WRITE_MASK_VAL(0, 0, 0)), /* SPI0 IO mux M0 */
RK_MUXROUTE_GRF(2, RK_PD3, 3, 0x030c, WRITE_MASK_VAL(0, 0, 1)), /* SPI0 IO mux M1 */
RK_MUXROUTE_GRF(2, RK_PB5, 3, 0x030c, WRITE_MASK_VAL(2, 2, 0)), /* SPI1 IO mux M0 */
RK_MUXROUTE_GRF(3, RK_PC3, 3, 0x030c, WRITE_MASK_VAL(2, 2, 1)), /* SPI1 IO mux M1 */
@@ -973,8 +973,8 @@ static struct rockchip_mux_route_data rk3568_mux_route_data[] = {
RK_MUXROUTE_GRF(4, RK_PB3, 4, 0x030c, WRITE_MASK_VAL(6, 6, 0)), /* SPI3 IO mux M0 */
RK_MUXROUTE_GRF(4, RK_PC2, 2, 0x030c, WRITE_MASK_VAL(6, 6, 1)), /* SPI3 IO mux M1 */
RK_MUXROUTE_GRF(2, RK_PB4, 2, 0x030c, WRITE_MASK_VAL(8, 8, 0)), /* UART1 IO mux M0 */
- RK_MUXROUTE_PMU(0, RK_PD1, 1, 0x030c, WRITE_MASK_VAL(8, 8, 1)), /* UART1 IO mux M1 */
- RK_MUXROUTE_PMU(0, RK_PD1, 1, 0x030c, WRITE_MASK_VAL(10, 10, 0)), /* UART2 IO mux M0 */
+ RK_MUXROUTE_GRF(3, RK_PD6, 4, 0x030c, WRITE_MASK_VAL(8, 8, 1)), /* UART1 IO mux M1 */
+ RK_MUXROUTE_GRF(0, RK_PD1, 1, 0x030c, WRITE_MASK_VAL(10, 10, 0)), /* UART2 IO mux M0 */
RK_MUXROUTE_GRF(1, RK_PD5, 2, 0x030c, WRITE_MASK_VAL(10, 10, 1)), /* UART2 IO mux M1 */
RK_MUXROUTE_GRF(1, RK_PA1, 2, 0x030c, WRITE_MASK_VAL(12, 12, 0)), /* UART3 IO mux M0 */
RK_MUXROUTE_GRF(3, RK_PB7, 4, 0x030c, WRITE_MASK_VAL(12, 12, 1)), /* UART3 IO mux M1 */
@@ -1004,13 +1004,13 @@ static struct rockchip_mux_route_data rk3568_mux_route_data[] = {
RK_MUXROUTE_GRF(3, RK_PD6, 5, 0x0314, WRITE_MASK_VAL(1, 0, 1)), /* PDM IO mux M1 */
RK_MUXROUTE_GRF(4, RK_PA0, 4, 0x0314, WRITE_MASK_VAL(1, 0, 1)), /* PDM IO mux M1 */
RK_MUXROUTE_GRF(3, RK_PC4, 5, 0x0314, WRITE_MASK_VAL(1, 0, 2)), /* PDM IO mux M2 */
- RK_MUXROUTE_PMU(0, RK_PA5, 3, 0x0314, WRITE_MASK_VAL(3, 2, 0)), /* PCIE20 IO mux M0 */
+ RK_MUXROUTE_GRF(0, RK_PA5, 3, 0x0314, WRITE_MASK_VAL(3, 2, 0)), /* PCIE20 IO mux M0 */
RK_MUXROUTE_GRF(2, RK_PD0, 4, 0x0314, WRITE_MASK_VAL(3, 2, 1)), /* PCIE20 IO mux M1 */
RK_MUXROUTE_GRF(1, RK_PB0, 4, 0x0314, WRITE_MASK_VAL(3, 2, 2)), /* PCIE20 IO mux M2 */
- RK_MUXROUTE_PMU(0, RK_PA4, 3, 0x0314, WRITE_MASK_VAL(5, 4, 0)), /* PCIE30X1 IO mux M0 */
+ RK_MUXROUTE_GRF(0, RK_PA4, 3, 0x0314, WRITE_MASK_VAL(5, 4, 0)), /* PCIE30X1 IO mux M0 */
RK_MUXROUTE_GRF(2, RK_PD2, 4, 0x0314, WRITE_MASK_VAL(5, 4, 1)), /* PCIE30X1 IO mux M1 */
RK_MUXROUTE_GRF(1, RK_PA5, 4, 0x0314, WRITE_MASK_VAL(5, 4, 2)), /* PCIE30X1 IO mux M2 */
- RK_MUXROUTE_PMU(0, RK_PA6, 2, 0x0314, WRITE_MASK_VAL(7, 6, 0)), /* PCIE30X2 IO mux M0 */
+ RK_MUXROUTE_GRF(0, RK_PA6, 2, 0x0314, WRITE_MASK_VAL(7, 6, 0)), /* PCIE30X2 IO mux M0 */
RK_MUXROUTE_GRF(2, RK_PD4, 4, 0x0314, WRITE_MASK_VAL(7, 6, 1)), /* PCIE30X2 IO mux M1 */
RK_MUXROUTE_GRF(4, RK_PC2, 4, 0x0314, WRITE_MASK_VAL(7, 6, 2)), /* PCIE30X2 IO mux M2 */
};
@@ -2436,10 +2436,19 @@ static int rockchip_get_pull(struct rockchip_pin_bank *bank, int pin_num)
case RK3308:
case RK3368:
case RK3399:
+ case RK3568:
case RK3588:
pull_type = bank->pull_type[pin_num / 8];
data >>= bit;
data &= (1 << RK3188_PULL_BITS_PER_PIN) - 1;
+ /*
+ * In the TRM, pull-up being 1 for everything except the GPIO0_D3-D6,
+ * where that pull up value becomes 3.
+ */
+ if (ctrl->type == RK3568 && bank->bank_num == 0 && pin_num >= 27 && pin_num <= 30) {
+ if (data == 3)
+ data = 1;
+ }
return rockchip_pull_list[pull_type][data];
default:
@@ -2497,7 +2506,7 @@ static int rockchip_set_pull(struct rockchip_pin_bank *bank,
}
}
/*
- * In the TRM, pull-up being 1 for everything except the GPIO0_D0-D6,
+ * In the TRM, pull-up being 1 for everything except the GPIO0_D3-D6,
* where that pull up value becomes 3.
*/
if (ctrl->type == RK3568 && bank->bank_num == 0 && pin_num >= 27 && pin_num <= 30) {
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index 99c3745da456..190923757cda 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -372,6 +372,8 @@ static int pcs_set_mux(struct pinctrl_dev *pctldev, unsigned fselector,
if (!pcs->fmask)
return 0;
function = pinmux_generic_get_function(pctldev, fselector);
+ if (!function)
+ return -EINVAL;
func = function->data;
if (!func)
return -EINVAL;
diff --git a/drivers/pinctrl/qcom/pinctrl-sm8450-lpass-lpi.c b/drivers/pinctrl/qcom/pinctrl-sm8450-lpass-lpi.c
index c3c8c34148f1..e22d03ce292e 100644
--- a/drivers/pinctrl/qcom/pinctrl-sm8450-lpass-lpi.c
+++ b/drivers/pinctrl/qcom/pinctrl-sm8450-lpass-lpi.c
@@ -105,7 +105,7 @@ static const struct pinctrl_pin_desc sm8450_lpi_pins[] = {
static const char * const swr_tx_clk_groups[] = { "gpio0" };
static const char * const swr_tx_data_groups[] = { "gpio1", "gpio2", "gpio14" };
static const char * const swr_rx_clk_groups[] = { "gpio3" };
-static const char * const swr_rx_data_groups[] = { "gpio4", "gpio5", "gpio15" };
+static const char * const swr_rx_data_groups[] = { "gpio4", "gpio5" };
static const char * const dmic1_clk_groups[] = { "gpio6" };
static const char * const dmic1_data_groups[] = { "gpio7" };
static const char * const dmic2_clk_groups[] = { "gpio8" };
diff --git a/drivers/pinctrl/sunplus/sppctl.c b/drivers/pinctrl/sunplus/sppctl.c
index 2b3335ab56c6..25101293268f 100644
--- a/drivers/pinctrl/sunplus/sppctl.c
+++ b/drivers/pinctrl/sunplus/sppctl.c
@@ -499,7 +499,6 @@ static int sppctl_gpio_set_config(struct gpio_chip *chip, unsigned int offset,
return 0;
}
-#ifdef CONFIG_DEBUG_FS
static void sppctl_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
{
const char *label;
@@ -521,7 +520,6 @@ static void sppctl_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
seq_puts(s, "\n");
}
}
-#endif
static int sppctl_gpio_new(struct platform_device *pdev, struct sppctl_pdata *pctl)
{
@@ -550,9 +548,8 @@ static int sppctl_gpio_new(struct platform_device *pdev, struct sppctl_pdata *pc
gchip->get = sppctl_gpio_get;
gchip->set = sppctl_gpio_set;
gchip->set_config = sppctl_gpio_set_config;
-#ifdef CONFIG_DEBUG_FS
- gchip->dbg_show = sppctl_gpio_dbg_show;
-#endif
+ gchip->dbg_show = IS_ENABLED(CONFIG_DEBUG_FS) ?
+ sppctl_gpio_dbg_show : NULL;
gchip->base = -1;
gchip->ngpio = sppctl_gpio_list_sz;
gchip->names = sppctl_gpio_list_s;
diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c
index 43e765199137..c6537a1b3a2e 100644
--- a/drivers/platform/surface/aggregator/controller.c
+++ b/drivers/platform/surface/aggregator/controller.c
@@ -1700,8 +1700,10 @@ int ssam_request_sync(struct ssam_controller *ctrl,
return status;
status = ssam_request_sync_init(rqst, spec->flags);
- if (status)
+ if (status) {
+ ssam_request_sync_free(rqst);
return status;
+ }
ssam_request_sync_set_resp(rqst, rsp);
diff --git a/drivers/platform/surface/aggregator/ssh_request_layer.c b/drivers/platform/surface/aggregator/ssh_request_layer.c
index f5565570f16c..69132976d297 100644
--- a/drivers/platform/surface/aggregator/ssh_request_layer.c
+++ b/drivers/platform/surface/aggregator/ssh_request_layer.c
@@ -916,6 +916,20 @@ static void ssh_rtl_rx_command(struct ssh_ptl *p, const struct ssam_span *data)
if (sshp_parse_command(dev, data, &command, &command_data))
return;
+ /*
+ * Check if the message was intended for us. If not, drop it.
+ *
+ * Note: We will need to change this to handle debug messages. On newer
+ * generation devices, these seem to be sent to tid_out=0x03. We as
+ * host can still receive them as they can be forwarded via an override
+ * option on SAM, but doing so does not change tid_out=0x00.
+ */
+ if (command->tid_out != 0x00) {
+ rtl_warn(rtl, "rtl: dropping message not intended for us (tid = %#04x)\n",
+ command->tid_out);
+ return;
+ }
+
if (ssh_rqid_is_event(get_unaligned_le16(&command->rqid)))
ssh_rtl_rx_event(rtl, command, &command_data);
else
diff --git a/drivers/platform/x86/amd/Kconfig b/drivers/platform/x86/amd/Kconfig
index a825af8126c8..2ce8cb2170df 100644
--- a/drivers/platform/x86/amd/Kconfig
+++ b/drivers/platform/x86/amd/Kconfig
@@ -8,6 +8,7 @@ source "drivers/platform/x86/amd/pmf/Kconfig"
config AMD_PMC
tristate "AMD SoC PMC driver"
depends on ACPI && PCI && RTC_CLASS
+ select SERIO
help
The driver provides support for AMD Power Management Controller
primarily responsible for S2Idle transactions that are driven from
diff --git a/drivers/platform/x86/amd/pmc.c b/drivers/platform/x86/amd/pmc.c
index 439d282aafd1..3cbb01ec10e3 100644
--- a/drivers/platform/x86/amd/pmc.c
+++ b/drivers/platform/x86/amd/pmc.c
@@ -22,6 +22,7 @@
#include <linux/pci.h>
#include <linux/platform_device.h>
#include <linux/rtc.h>
+#include <linux/serio.h>
#include <linux/suspend.h>
#include <linux/seq_file.h>
#include <linux/uaccess.h>
@@ -160,6 +161,10 @@ static bool enable_stb;
module_param(enable_stb, bool, 0644);
MODULE_PARM_DESC(enable_stb, "Enable the STB debug mechanism");
+static bool disable_workarounds;
+module_param(disable_workarounds, bool, 0644);
+MODULE_PARM_DESC(disable_workarounds, "Disable workarounds for platform bugs");
+
static struct amd_pmc_dev pmc;
static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, u32 arg, u32 *data, u8 msg, bool ret);
static int amd_pmc_read_stb(struct amd_pmc_dev *dev, u32 *buf);
@@ -653,6 +658,33 @@ static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev)
return -EINVAL;
}
+static int amd_pmc_czn_wa_irq1(struct amd_pmc_dev *pdev)
+{
+ struct device *d;
+ int rc;
+
+ if (!pdev->major) {
+ rc = amd_pmc_get_smu_version(pdev);
+ if (rc)
+ return rc;
+ }
+
+ if (pdev->major > 64 || (pdev->major == 64 && pdev->minor > 65))
+ return 0;
+
+ d = bus_find_device_by_name(&serio_bus, NULL, "serio0");
+ if (!d)
+ return 0;
+ if (device_may_wakeup(d)) {
+ dev_info_once(d, "Disabling IRQ1 wakeup source to avoid platform firmware bug\n");
+ disable_irq_wake(1);
+ device_set_wakeup_enable(d, false);
+ }
+ put_device(d);
+
+ return 0;
+}
+
static int amd_pmc_verify_czn_rtc(struct amd_pmc_dev *pdev, u32 *arg)
{
struct rtc_device *rtc_device;
@@ -715,8 +747,8 @@ static void amd_pmc_s2idle_prepare(void)
/* Reset and Start SMU logging - to monitor the s0i3 stats */
amd_pmc_setup_smu_logging(pdev);
- /* Activate CZN specific RTC functionality */
- if (pdev->cpu_id == AMD_CPU_ID_CZN) {
+ /* Activate CZN specific platform bug workarounds */
+ if (pdev->cpu_id == AMD_CPU_ID_CZN && !disable_workarounds) {
rc = amd_pmc_verify_czn_rtc(pdev, &arg);
if (rc) {
dev_err(pdev->dev, "failed to set RTC: %d\n", rc);
@@ -782,6 +814,25 @@ static struct acpi_s2idle_dev_ops amd_pmc_s2idle_dev_ops = {
.check = amd_pmc_s2idle_check,
.restore = amd_pmc_s2idle_restore,
};
+
+static int __maybe_unused amd_pmc_suspend_handler(struct device *dev)
+{
+ struct amd_pmc_dev *pdev = dev_get_drvdata(dev);
+
+ if (pdev->cpu_id == AMD_CPU_ID_CZN && !disable_workarounds) {
+ int rc = amd_pmc_czn_wa_irq1(pdev);
+
+ if (rc) {
+ dev_err(pdev->dev, "failed to adjust keyboard wakeup: %d\n", rc);
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(amd_pmc_pm, amd_pmc_suspend_handler, NULL);
+
#endif
static const struct pci_device_id pmc_pci_ids[] = {
@@ -932,7 +983,7 @@ static int amd_pmc_probe(struct platform_device *pdev)
if (enable_stb && (dev->cpu_id == AMD_CPU_ID_YC || dev->cpu_id == AMD_CPU_ID_CB)) {
err = amd_pmc_s2d_init(dev);
if (err)
- return err;
+ goto err_pci_dev_put;
}
platform_set_drvdata(pdev, dev);
@@ -980,6 +1031,9 @@ static struct platform_driver amd_pmc_driver = {
.name = "amd_pmc",
.acpi_match_table = amd_pmc_acpi_ids,
.dev_groups = pmc_groups,
+#ifdef CONFIG_SUSPEND
+ .pm = &amd_pmc_pm,
+#endif
},
.probe = amd_pmc_probe,
.remove = amd_pmc_remove,
diff --git a/drivers/platform/x86/amd/pmf/auto-mode.c b/drivers/platform/x86/amd/pmf/auto-mode.c
index 644af42e07cf..96a8e1832c05 100644
--- a/drivers/platform/x86/amd/pmf/auto-mode.c
+++ b/drivers/platform/x86/amd/pmf/auto-mode.c
@@ -275,13 +275,8 @@ int amd_pmf_reset_amt(struct amd_pmf_dev *dev)
*/
if (is_apmf_func_supported(dev, APMF_FUNC_STATIC_SLIDER_GRANULAR)) {
- int mode = amd_pmf_get_pprof_modes(dev);
-
- if (mode < 0)
- return mode;
-
dev_dbg(dev->dev, "resetting AMT thermals\n");
- amd_pmf_update_slider(dev, SLIDER_OP_SET, mode, NULL);
+ amd_pmf_set_sps_power_limits(dev);
}
return 0;
}
@@ -299,7 +294,5 @@ void amd_pmf_deinit_auto_mode(struct amd_pmf_dev *dev)
void amd_pmf_init_auto_mode(struct amd_pmf_dev *dev)
{
amd_pmf_load_defaults_auto_mode(dev);
- /* update the thermal limits for Automode */
- amd_pmf_set_automode(dev, config_store.current_mode, NULL);
amd_pmf_init_metrics_table(dev);
}
diff --git a/drivers/platform/x86/amd/pmf/cnqf.c b/drivers/platform/x86/amd/pmf/cnqf.c
index 3f9731a2ac28..4beb22a19466 100644
--- a/drivers/platform/x86/amd/pmf/cnqf.c
+++ b/drivers/platform/x86/amd/pmf/cnqf.c
@@ -103,7 +103,7 @@ int amd_pmf_trans_cnqf(struct amd_pmf_dev *dev, int socket_power, ktime_t time_l
src = amd_pmf_cnqf_get_power_source(dev);
- if (dev->current_profile == PLATFORM_PROFILE_BALANCED) {
+ if (is_pprof_balanced(dev)) {
amd_pmf_set_cnqf(dev, src, config_store.current_mode, NULL);
} else {
/*
@@ -307,13 +307,9 @@ static ssize_t cnqf_enable_store(struct device *dev,
const char *buf, size_t count)
{
struct amd_pmf_dev *pdev = dev_get_drvdata(dev);
- int mode, result, src;
+ int result, src;
bool input;
- mode = amd_pmf_get_pprof_modes(pdev);
- if (mode < 0)
- return mode;
-
result = kstrtobool(buf, &input);
if (result)
return result;
@@ -321,11 +317,11 @@ static ssize_t cnqf_enable_store(struct device *dev,
src = amd_pmf_cnqf_get_power_source(pdev);
pdev->cnqf_enabled = input;
- if (pdev->cnqf_enabled && pdev->current_profile == PLATFORM_PROFILE_BALANCED) {
+ if (pdev->cnqf_enabled && is_pprof_balanced(pdev)) {
amd_pmf_set_cnqf(pdev, src, config_store.current_mode, NULL);
} else {
if (is_apmf_func_supported(pdev, APMF_FUNC_STATIC_SLIDER_GRANULAR))
- amd_pmf_update_slider(pdev, SLIDER_OP_SET, mode, NULL);
+ amd_pmf_set_sps_power_limits(pdev);
}
dev_dbg(pdev->dev, "Received CnQF %s\n", input ? "on" : "off");
@@ -386,7 +382,7 @@ int amd_pmf_init_cnqf(struct amd_pmf_dev *dev)
dev->cnqf_enabled = amd_pmf_check_flags(dev);
/* update the thermal for CnQF */
- if (dev->cnqf_enabled && dev->current_profile == PLATFORM_PROFILE_BALANCED) {
+ if (dev->cnqf_enabled && is_pprof_balanced(dev)) {
src = amd_pmf_cnqf_get_power_source(dev);
amd_pmf_set_cnqf(dev, src, config_store.current_mode, NULL);
}
diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c
index a5f5a4bcff6d..da23639071d7 100644
--- a/drivers/platform/x86/amd/pmf/core.c
+++ b/drivers/platform/x86/amd/pmf/core.c
@@ -58,6 +58,25 @@ static bool force_load;
module_param(force_load, bool, 0444);
MODULE_PARM_DESC(force_load, "Force load this driver on supported older platforms (experimental)");
+static int amd_pmf_pwr_src_notify_call(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct amd_pmf_dev *pmf = container_of(nb, struct amd_pmf_dev, pwr_src_notifier);
+
+ if (event != PSY_EVENT_PROP_CHANGED)
+ return NOTIFY_OK;
+
+ if (is_apmf_func_supported(pmf, APMF_FUNC_AUTO_MODE) ||
+ is_apmf_func_supported(pmf, APMF_FUNC_DYN_SLIDER_DC) ||
+ is_apmf_func_supported(pmf, APMF_FUNC_DYN_SLIDER_AC)) {
+ if ((pmf->amt_enabled || pmf->cnqf_enabled) && is_pprof_balanced(pmf))
+ return NOTIFY_DONE;
+ }
+
+ amd_pmf_set_sps_power_limits(pmf);
+
+ return NOTIFY_OK;
+}
+
static int current_power_limits_show(struct seq_file *seq, void *unused)
{
struct amd_pmf_dev *dev = seq->private;
@@ -366,14 +385,18 @@ static int amd_pmf_probe(struct platform_device *pdev)
if (!dev->regbase)
return -ENOMEM;
+ mutex_init(&dev->lock);
+ mutex_init(&dev->update_mutex);
+
apmf_acpi_init(dev);
platform_set_drvdata(pdev, dev);
amd_pmf_init_features(dev);
apmf_install_handler(dev);
amd_pmf_dbgfs_register(dev);
- mutex_init(&dev->lock);
- mutex_init(&dev->update_mutex);
+ dev->pwr_src_notifier.notifier_call = amd_pmf_pwr_src_notify_call;
+ power_supply_reg_notifier(&dev->pwr_src_notifier);
+
dev_info(dev->dev, "registered PMF device successfully\n");
return 0;
@@ -383,11 +406,12 @@ static int amd_pmf_remove(struct platform_device *pdev)
{
struct amd_pmf_dev *dev = platform_get_drvdata(pdev);
- mutex_destroy(&dev->lock);
- mutex_destroy(&dev->update_mutex);
+ power_supply_unreg_notifier(&dev->pwr_src_notifier);
amd_pmf_deinit_features(dev);
apmf_acpi_deinit(dev);
amd_pmf_dbgfs_unregister(dev);
+ mutex_destroy(&dev->lock);
+ mutex_destroy(&dev->update_mutex);
kfree(dev->buf);
return 0;
}
diff --git a/drivers/platform/x86/amd/pmf/pmf.h b/drivers/platform/x86/amd/pmf/pmf.h
index 84bbe2c6ea61..06c30cdc0573 100644
--- a/drivers/platform/x86/amd/pmf/pmf.h
+++ b/drivers/platform/x86/amd/pmf/pmf.h
@@ -169,6 +169,7 @@ struct amd_pmf_dev {
struct mutex update_mutex; /* protects race between ACPI handler and metrics thread */
bool cnqf_enabled;
bool cnqf_supported;
+ struct notifier_block pwr_src_notifier;
};
struct apmf_sps_prop_granular {
@@ -391,9 +392,11 @@ int amd_pmf_init_sps(struct amd_pmf_dev *dev);
void amd_pmf_deinit_sps(struct amd_pmf_dev *dev);
int apmf_get_static_slider_granular(struct amd_pmf_dev *pdev,
struct apmf_static_slider_granular_output *output);
+bool is_pprof_balanced(struct amd_pmf_dev *pmf);
int apmf_update_fan_idx(struct amd_pmf_dev *pdev, bool manual, u32 idx);
+int amd_pmf_set_sps_power_limits(struct amd_pmf_dev *pmf);
/* Auto Mode Layer */
int apmf_get_auto_mode_def(struct amd_pmf_dev *pdev, struct apmf_auto_mode *data);
diff --git a/drivers/platform/x86/amd/pmf/sps.c b/drivers/platform/x86/amd/pmf/sps.c
index dba7e36962dc..bed762d47a14 100644
--- a/drivers/platform/x86/amd/pmf/sps.c
+++ b/drivers/platform/x86/amd/pmf/sps.c
@@ -70,6 +70,24 @@ void amd_pmf_update_slider(struct amd_pmf_dev *dev, bool op, int idx,
}
}
+int amd_pmf_set_sps_power_limits(struct amd_pmf_dev *pmf)
+{
+ int mode;
+
+ mode = amd_pmf_get_pprof_modes(pmf);
+ if (mode < 0)
+ return mode;
+
+ amd_pmf_update_slider(pmf, SLIDER_OP_SET, mode, NULL);
+
+ return 0;
+}
+
+bool is_pprof_balanced(struct amd_pmf_dev *pmf)
+{
+ return (pmf->current_profile == PLATFORM_PROFILE_BALANCED) ? true : false;
+}
+
static int amd_pmf_profile_get(struct platform_profile_handler *pprof,
enum platform_profile_option *profile)
{
@@ -105,15 +123,10 @@ static int amd_pmf_profile_set(struct platform_profile_handler *pprof,
enum platform_profile_option profile)
{
struct amd_pmf_dev *pmf = container_of(pprof, struct amd_pmf_dev, pprof);
- int mode;
pmf->current_profile = profile;
- mode = amd_pmf_get_pprof_modes(pmf);
- if (mode < 0)
- return mode;
- amd_pmf_update_slider(pmf, SLIDER_OP_SET, mode, NULL);
- return 0;
+ return amd_pmf_set_sps_power_limits(pmf);
}
int amd_pmf_init_sps(struct amd_pmf_dev *dev)
@@ -123,6 +136,9 @@ int amd_pmf_init_sps(struct amd_pmf_dev *dev)
dev->current_profile = PLATFORM_PROFILE_BALANCED;
amd_pmf_load_defaults_sps(dev);
+ /* update SPS balanced power mode thermals */
+ amd_pmf_set_sps_power_limits(dev);
+
dev->pprof.profile_get = amd_pmf_profile_get;
dev->pprof.profile_set = amd_pmf_profile_set;
diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c
index ca33df7ea550..9333f82cfa8a 100644
--- a/drivers/platform/x86/apple-gmux.c
+++ b/drivers/platform/x86/apple-gmux.c
@@ -64,29 +64,6 @@ struct apple_gmux_data {
static struct apple_gmux_data *apple_gmux_data;
-/*
- * gmux port offsets. Many of these are not yet used, but may be in the
- * future, and it's useful to have them documented here anyhow.
- */
-#define GMUX_PORT_VERSION_MAJOR 0x04
-#define GMUX_PORT_VERSION_MINOR 0x05
-#define GMUX_PORT_VERSION_RELEASE 0x06
-#define GMUX_PORT_SWITCH_DISPLAY 0x10
-#define GMUX_PORT_SWITCH_GET_DISPLAY 0x11
-#define GMUX_PORT_INTERRUPT_ENABLE 0x14
-#define GMUX_PORT_INTERRUPT_STATUS 0x16
-#define GMUX_PORT_SWITCH_DDC 0x28
-#define GMUX_PORT_SWITCH_EXTERNAL 0x40
-#define GMUX_PORT_SWITCH_GET_EXTERNAL 0x41
-#define GMUX_PORT_DISCRETE_POWER 0x50
-#define GMUX_PORT_MAX_BRIGHTNESS 0x70
-#define GMUX_PORT_BRIGHTNESS 0x74
-#define GMUX_PORT_VALUE 0xc2
-#define GMUX_PORT_READ 0xd0
-#define GMUX_PORT_WRITE 0xd4
-
-#define GMUX_MIN_IO_LEN (GMUX_PORT_BRIGHTNESS + 4)
-
#define GMUX_INTERRUPT_ENABLE 0xff
#define GMUX_INTERRUPT_DISABLE 0x00
@@ -249,23 +226,6 @@ static void gmux_write32(struct apple_gmux_data *gmux_data, int port,
gmux_pio_write32(gmux_data, port, val);
}
-static bool gmux_is_indexed(struct apple_gmux_data *gmux_data)
-{
- u16 val;
-
- outb(0xaa, gmux_data->iostart + 0xcc);
- outb(0x55, gmux_data->iostart + 0xcd);
- outb(0x00, gmux_data->iostart + 0xce);
-
- val = inb(gmux_data->iostart + 0xcc) |
- (inb(gmux_data->iostart + 0xcd) << 8);
-
- if (val == 0x55aa)
- return true;
-
- return false;
-}
-
/**
* DOC: Backlight control
*
@@ -605,60 +565,43 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
int ret = -ENXIO;
acpi_status status;
unsigned long long gpe;
+ bool indexed = false;
+ u32 version;
if (apple_gmux_data)
return -EBUSY;
+ if (!apple_gmux_detect(pnp, &indexed)) {
+ pr_info("gmux device not present\n");
+ return -ENODEV;
+ }
+
gmux_data = kzalloc(sizeof(*gmux_data), GFP_KERNEL);
if (!gmux_data)
return -ENOMEM;
pnp_set_drvdata(pnp, gmux_data);
res = pnp_get_resource(pnp, IORESOURCE_IO, 0);
- if (!res) {
- pr_err("Failed to find gmux I/O resource\n");
- goto err_free;
- }
-
gmux_data->iostart = res->start;
gmux_data->iolen = resource_size(res);
- if (gmux_data->iolen < GMUX_MIN_IO_LEN) {
- pr_err("gmux I/O region too small (%lu < %u)\n",
- gmux_data->iolen, GMUX_MIN_IO_LEN);
- goto err_free;
- }
-
if (!request_region(gmux_data->iostart, gmux_data->iolen,
"Apple gmux")) {
pr_err("gmux I/O already in use\n");
goto err_free;
}
- /*
- * Invalid version information may indicate either that the gmux
- * device isn't present or that it's a new one that uses indexed
- * io
- */
-
- ver_major = gmux_read8(gmux_data, GMUX_PORT_VERSION_MAJOR);
- ver_minor = gmux_read8(gmux_data, GMUX_PORT_VERSION_MINOR);
- ver_release = gmux_read8(gmux_data, GMUX_PORT_VERSION_RELEASE);
- if (ver_major == 0xff && ver_minor == 0xff && ver_release == 0xff) {
- if (gmux_is_indexed(gmux_data)) {
- u32 version;
- mutex_init(&gmux_data->index_lock);
- gmux_data->indexed = true;
- version = gmux_read32(gmux_data,
- GMUX_PORT_VERSION_MAJOR);
- ver_major = (version >> 24) & 0xff;
- ver_minor = (version >> 16) & 0xff;
- ver_release = (version >> 8) & 0xff;
- } else {
- pr_info("gmux device not present\n");
- ret = -ENODEV;
- goto err_release;
- }
+ if (indexed) {
+ mutex_init(&gmux_data->index_lock);
+ gmux_data->indexed = true;
+ version = gmux_read32(gmux_data, GMUX_PORT_VERSION_MAJOR);
+ ver_major = (version >> 24) & 0xff;
+ ver_minor = (version >> 16) & 0xff;
+ ver_release = (version >> 8) & 0xff;
+ } else {
+ ver_major = gmux_read8(gmux_data, GMUX_PORT_VERSION_MAJOR);
+ ver_minor = gmux_read8(gmux_data, GMUX_PORT_VERSION_MINOR);
+ ver_release = gmux_read8(gmux_data, GMUX_PORT_VERSION_RELEASE);
}
pr_info("Found gmux version %d.%d.%d [%s]\n", ver_major, ver_minor,
ver_release, (gmux_data->indexed ? "indexed" : "classic"));
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index c685a705b73d..cb15acdf14a3 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -121,6 +121,10 @@ static struct quirk_entry quirk_asus_tablet_mode = {
.tablet_switch_mode = asus_wmi_lid_flip_rog_devid,
};
+static struct quirk_entry quirk_asus_ignore_fan = {
+ .wmi_ignore_fan = true,
+};
+
static int dmi_matched(const struct dmi_system_id *dmi)
{
pr_info("Identified laptop model '%s'\n", dmi->ident);
@@ -473,6 +477,15 @@ static const struct dmi_system_id asus_quirks[] = {
},
.driver_data = &quirk_asus_tablet_mode,
},
+ {
+ .callback = dmi_matched,
+ .ident = "ASUS VivoBook E410MA",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "E410MA"),
+ },
+ .driver_data = &quirk_asus_ignore_fan,
+ },
{},
};
@@ -511,6 +524,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = {
{ KE_KEY, 0x30, { KEY_VOLUMEUP } },
{ KE_KEY, 0x31, { KEY_VOLUMEDOWN } },
{ KE_KEY, 0x32, { KEY_MUTE } },
+ { KE_KEY, 0x33, { KEY_SCREENLOCK } },
{ KE_KEY, 0x35, { KEY_SCREENLOCK } },
{ KE_KEY, 0x38, { KEY_PROG3 } }, /* Armoury Crate */
{ KE_KEY, 0x40, { KEY_PREVIOUSSONG } },
@@ -544,6 +558,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = {
{ KE_KEY, 0x7D, { KEY_BLUETOOTH } }, /* Bluetooth Enable */
{ KE_KEY, 0x7E, { KEY_BLUETOOTH } }, /* Bluetooth Disable */
{ KE_KEY, 0x82, { KEY_CAMERA } },
+ { KE_KEY, 0x85, { KEY_CAMERA } },
{ KE_KEY, 0x86, { KEY_PROG1 } }, /* MyASUS Key */
{ KE_KEY, 0x88, { KEY_RFKILL } }, /* Radio Toggle Key */
{ KE_KEY, 0x8A, { KEY_PROG1 } }, /* Color enhancement mode */
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 6f81b2844dcb..1038dfdcdd32 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -225,6 +225,7 @@ struct asus_wmi {
int tablet_switch_event_code;
u32 tablet_switch_dev_id;
+ bool tablet_switch_inverted;
enum fan_type fan_type;
enum fan_type gpu_fan_type;
@@ -493,6 +494,13 @@ static bool asus_wmi_dev_is_present(struct asus_wmi *asus, u32 dev_id)
}
/* Input **********************************************************************/
+static void asus_wmi_tablet_sw_report(struct asus_wmi *asus, bool value)
+{
+ input_report_switch(asus->inputdev, SW_TABLET_MODE,
+ asus->tablet_switch_inverted ? !value : value);
+ input_sync(asus->inputdev);
+}
+
static void asus_wmi_tablet_sw_init(struct asus_wmi *asus, u32 dev_id, int event_code)
{
struct device *dev = &asus->platform_device->dev;
@@ -501,7 +509,7 @@ static void asus_wmi_tablet_sw_init(struct asus_wmi *asus, u32 dev_id, int event
result = asus_wmi_get_devstate_simple(asus, dev_id);
if (result >= 0) {
input_set_capability(asus->inputdev, EV_SW, SW_TABLET_MODE);
- input_report_switch(asus->inputdev, SW_TABLET_MODE, result);
+ asus_wmi_tablet_sw_report(asus, result);
asus->tablet_switch_dev_id = dev_id;
asus->tablet_switch_event_code = event_code;
} else if (result == -ENODEV) {
@@ -534,6 +542,7 @@ static int asus_wmi_input_init(struct asus_wmi *asus)
case asus_wmi_no_tablet_switch:
break;
case asus_wmi_kbd_dock_devid:
+ asus->tablet_switch_inverted = true;
asus_wmi_tablet_sw_init(asus, ASUS_WMI_DEVID_KBD_DOCK, NOTIFY_KBD_DOCK_CHANGE);
break;
case asus_wmi_lid_flip_devid:
@@ -573,10 +582,8 @@ static void asus_wmi_tablet_mode_get_state(struct asus_wmi *asus)
return;
result = asus_wmi_get_devstate_simple(asus, asus->tablet_switch_dev_id);
- if (result >= 0) {
- input_report_switch(asus->inputdev, SW_TABLET_MODE, result);
- input_sync(asus->inputdev);
- }
+ if (result >= 0)
+ asus_wmi_tablet_sw_report(asus, result);
}
/* dGPU ********************************************************************/
@@ -2243,7 +2250,9 @@ static int asus_wmi_fan_init(struct asus_wmi *asus)
asus->fan_type = FAN_TYPE_NONE;
asus->agfn_pwm = -1;
- if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_CPU_FAN_CTRL))
+ if (asus->driver->quirks->wmi_ignore_fan)
+ asus->fan_type = FAN_TYPE_NONE;
+ else if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_CPU_FAN_CTRL))
asus->fan_type = FAN_TYPE_SPEC83;
else if (asus_wmi_has_agfn_fan(asus))
asus->fan_type = FAN_TYPE_AGFN;
@@ -2436,6 +2445,9 @@ static int fan_curve_check_present(struct asus_wmi *asus, bool *available,
*available = false;
+ if (asus->fan_type == FAN_TYPE_NONE)
+ return 0;
+
err = fan_curve_get_factory_default(asus, fan_dev);
if (err) {
return 0;
diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h
index 65316998b898..a478ebfd34df 100644
--- a/drivers/platform/x86/asus-wmi.h
+++ b/drivers/platform/x86/asus-wmi.h
@@ -38,6 +38,7 @@ struct quirk_entry {
bool store_backlight_power;
bool wmi_backlight_set_devstate;
bool wmi_force_als_set;
+ bool wmi_ignore_fan;
enum asus_wmi_tablet_switch_mode tablet_switch_mode;
int wapf;
/*
diff --git a/drivers/platform/x86/dell/dell-wmi-base.c b/drivers/platform/x86/dell/dell-wmi-base.c
index 0a259a27459f..502783a7adb1 100644
--- a/drivers/platform/x86/dell/dell-wmi-base.c
+++ b/drivers/platform/x86/dell/dell-wmi-base.c
@@ -261,6 +261,9 @@ static const struct key_entry dell_wmi_keymap_type_0010[] = {
{ KE_KEY, 0x57, { KEY_BRIGHTNESSDOWN } },
{ KE_KEY, 0x58, { KEY_BRIGHTNESSUP } },
+ /*Speaker Mute*/
+ { KE_KEY, 0x109, { KEY_MUTE} },
+
/* Mic mute */
{ KE_KEY, 0x150, { KEY_MICMUTE } },
diff --git a/drivers/platform/x86/dell/dell-wmi-privacy.c b/drivers/platform/x86/dell/dell-wmi-privacy.c
index c82b3d6867c5..c517bd45dd32 100644
--- a/drivers/platform/x86/dell/dell-wmi-privacy.c
+++ b/drivers/platform/x86/dell/dell-wmi-privacy.c
@@ -61,7 +61,7 @@ static const struct key_entry dell_wmi_keymap_type_0012[] = {
/* privacy mic mute */
{ KE_KEY, 0x0001, { KEY_MICMUTE } },
/* privacy camera mute */
- { KE_SW, 0x0002, { SW_CAMERA_LENS_COVER } },
+ { KE_VSW, 0x0002, { SW_CAMERA_LENS_COVER } },
{ KE_END, 0},
};
@@ -115,11 +115,15 @@ bool dell_privacy_process_event(int type, int code, int status)
switch (code) {
case DELL_PRIVACY_AUDIO_EVENT: /* Mic mute */
- case DELL_PRIVACY_CAMERA_EVENT: /* Camera mute */
priv->last_status = status;
sparse_keymap_report_entry(priv->input_dev, key, 1, true);
ret = true;
break;
+ case DELL_PRIVACY_CAMERA_EVENT: /* Camera mute */
+ priv->last_status = status;
+ sparse_keymap_report_entry(priv->input_dev, key, !(status & CAMERA_STATUS), false);
+ ret = true;
+ break;
default:
dev_dbg(&priv->wdev->dev, "unknown event type 0x%04x 0x%04x\n", type, code);
}
@@ -292,7 +296,7 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context)
{
struct privacy_wmi_data *priv;
struct key_entry *keymap;
- int ret, i;
+ int ret, i, j;
ret = wmi_has_guid(DELL_PRIVACY_GUID);
if (!ret)
@@ -304,6 +308,11 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context)
dev_set_drvdata(&wdev->dev, priv);
priv->wdev = wdev;
+
+ ret = get_current_status(priv->wdev);
+ if (ret)
+ return ret;
+
/* create evdev passing interface */
priv->input_dev = devm_input_allocate_device(&wdev->dev);
if (!priv->input_dev)
@@ -318,9 +327,20 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context)
/* remap the keymap code with Dell privacy key type 0x12 as prefix
* KEY_MICMUTE scancode will be reported as 0x120001
*/
- for (i = 0; i < ARRAY_SIZE(dell_wmi_keymap_type_0012); i++) {
- keymap[i] = dell_wmi_keymap_type_0012[i];
- keymap[i].code |= (0x0012 << 16);
+ for (i = 0, j = 0; i < ARRAY_SIZE(dell_wmi_keymap_type_0012); i++) {
+ /*
+ * Unlike keys where only presses matter, userspace may act
+ * on switches in both of their positions. Only register
+ * SW_CAMERA_LENS_COVER if it is actually there.
+ */
+ if (dell_wmi_keymap_type_0012[i].type == KE_VSW &&
+ dell_wmi_keymap_type_0012[i].sw.code == SW_CAMERA_LENS_COVER &&
+ !(priv->features_present & BIT(DELL_PRIVACY_TYPE_CAMERA)))
+ continue;
+
+ keymap[j] = dell_wmi_keymap_type_0012[i];
+ keymap[j].code |= (0x0012 << 16);
+ j++;
}
ret = sparse_keymap_setup(priv->input_dev, keymap, NULL);
kfree(keymap);
@@ -331,11 +351,12 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context)
priv->input_dev->name = "Dell Privacy Driver";
priv->input_dev->id.bustype = BUS_HOST;
- ret = input_register_device(priv->input_dev);
- if (ret)
- return ret;
+ /* Report initial camera-cover status */
+ if (priv->features_present & BIT(DELL_PRIVACY_TYPE_CAMERA))
+ input_report_switch(priv->input_dev, SW_CAMERA_LENS_COVER,
+ !(priv->last_status & CAMERA_STATUS));
- ret = get_current_status(priv->wdev);
+ ret = input_register_device(priv->input_dev);
if (ret)
return ret;
diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c
index 5e7e6659a849..322cfaeda17b 100644
--- a/drivers/platform/x86/gigabyte-wmi.c
+++ b/drivers/platform/x86/gigabyte-wmi.c
@@ -141,6 +141,7 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev)
static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M DS3H-CF"),
+ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M DS3H WIFI-CF"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M S2H V2"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE AX V2"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE"),
diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c
index 0a99058be813..2ef201b625b3 100644
--- a/drivers/platform/x86/hp/hp-wmi.c
+++ b/drivers/platform/x86/hp/hp-wmi.c
@@ -90,6 +90,7 @@ enum hp_wmi_event_ids {
HPWMI_PEAKSHIFT_PERIOD = 0x0F,
HPWMI_BATTERY_CHARGE_PERIOD = 0x10,
HPWMI_SANITIZATION_MODE = 0x17,
+ HPWMI_OMEN_KEY = 0x1D,
HPWMI_SMART_EXPERIENCE_APP = 0x21,
};
@@ -216,6 +217,8 @@ static const struct key_entry hp_wmi_keymap[] = {
{ KE_KEY, 0x213b, { KEY_INFO } },
{ KE_KEY, 0x2169, { KEY_ROTATE_DISPLAY } },
{ KE_KEY, 0x216a, { KEY_SETUP } },
+ { KE_KEY, 0x21a5, { KEY_PROG2 } }, /* HP Omen Key */
+ { KE_KEY, 0x21a7, { KEY_FN_ESC } },
{ KE_KEY, 0x21a9, { KEY_TOUCHPAD_OFF } },
{ KE_KEY, 0x121a9, { KEY_TOUCHPAD_ON } },
{ KE_KEY, 0x231b, { KEY_HELP } },
@@ -548,7 +551,7 @@ static int __init hp_wmi_enable_hotkeys(void)
static int hp_wmi_set_block(void *data, bool blocked)
{
- enum hp_wmi_radio r = (enum hp_wmi_radio) data;
+ enum hp_wmi_radio r = (long)data;
int query = BIT(r + 8) | ((!blocked) << r);
int ret;
@@ -810,6 +813,7 @@ static void hp_wmi_notify(u32 value, void *context)
case HPWMI_SMART_ADAPTER:
break;
case HPWMI_BEZEL_BUTTON:
+ case HPWMI_OMEN_KEY:
key_code = hp_wmi_read_int(HPWMI_HOTKEY_QUERY);
if (key_code < 0)
break;
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 435d2d3d903b..0eb5bfdd823a 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -1621,6 +1621,12 @@ static const struct dmi_system_id set_fn_lock_led_list[] = {
DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Legion R7000P2020H"),
}
},
+ {
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Legion 5 15ARH05"),
+ }
+ },
{}
};
diff --git a/drivers/platform/x86/intel/int3472/clk_and_regulator.c b/drivers/platform/x86/intel/int3472/clk_and_regulator.c
index b2342b3d78c7..74dc2cff799e 100644
--- a/drivers/platform/x86/intel/int3472/clk_and_regulator.c
+++ b/drivers/platform/x86/intel/int3472/clk_and_regulator.c
@@ -181,6 +181,9 @@ int skl_int3472_register_regulator(struct int3472_discrete_device *int3472,
return PTR_ERR(int3472->regulator.gpio);
}
+ /* Ensure the pin is in output mode and non-active state */
+ gpiod_direction_output(int3472->regulator.gpio, 0);
+
cfg.dev = &int3472->adev->dev;
cfg.init_data = &init_data;
cfg.ena_gpiod = int3472->regulator.gpio;
diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c
index 974a132db651..c42c3faa2c32 100644
--- a/drivers/platform/x86/intel/int3472/discrete.c
+++ b/drivers/platform/x86/intel/int3472/discrete.c
@@ -168,6 +168,8 @@ static int skl_int3472_map_gpio_to_clk(struct int3472_discrete_device *int3472,
return (PTR_ERR(gpio));
int3472->clock.ena_gpio = gpio;
+ /* Ensure the pin is in output mode and non-active state */
+ gpiod_direction_output(int3472->clock.ena_gpio, 0);
break;
case INT3472_GPIO_TYPE_PRIVACY_LED:
gpio = acpi_get_and_request_gpiod(path, pin, "int3472,privacy-led");
@@ -175,6 +177,8 @@ static int skl_int3472_map_gpio_to_clk(struct int3472_discrete_device *int3472,
return (PTR_ERR(gpio));
int3472->clock.led_gpio = gpio;
+ /* Ensure the pin is in output mode and non-active state */
+ gpiod_direction_output(int3472->clock.led_gpio, 0);
break;
default:
dev_err(int3472->dev, "Invalid GPIO type 0x%02x for clock\n", type);
diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c
index f1d802f6ec3f..3a15d32d7644 100644
--- a/drivers/platform/x86/intel/pmc/core.c
+++ b/drivers/platform/x86/intel/pmc/core.c
@@ -1029,6 +1029,7 @@ static const struct x86_cpu_id intel_pmc_core_ids[] = {
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, adl_core_init),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, adl_core_init),
X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, mtl_core_init),
+ X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, mtl_core_init),
{}
};
diff --git a/drivers/platform/x86/intel/vsec.c b/drivers/platform/x86/intel/vsec.c
index bb81b8b1f7e9..89c5374e33b3 100644
--- a/drivers/platform/x86/intel/vsec.c
+++ b/drivers/platform/x86/intel/vsec.c
@@ -408,14 +408,23 @@ static const struct intel_vsec_platform_info dg1_info = {
.quirks = VSEC_QUIRK_NO_DVSEC | VSEC_QUIRK_EARLY_HW,
};
+/* MTL info */
+static const struct intel_vsec_platform_info mtl_info = {
+ .quirks = VSEC_QUIRK_NO_WATCHER | VSEC_QUIRK_NO_CRASHLOG,
+};
+
#define PCI_DEVICE_ID_INTEL_VSEC_ADL 0x467d
#define PCI_DEVICE_ID_INTEL_VSEC_DG1 0x490e
+#define PCI_DEVICE_ID_INTEL_VSEC_MTL_M 0x7d0d
+#define PCI_DEVICE_ID_INTEL_VSEC_MTL_S 0xad0d
#define PCI_DEVICE_ID_INTEL_VSEC_OOBMSM 0x09a7
#define PCI_DEVICE_ID_INTEL_VSEC_RPL 0xa77d
#define PCI_DEVICE_ID_INTEL_VSEC_TGL 0x9a0d
static const struct pci_device_id intel_vsec_pci_ids[] = {
{ PCI_DEVICE_DATA(INTEL, VSEC_ADL, &tgl_info) },
{ PCI_DEVICE_DATA(INTEL, VSEC_DG1, &dg1_info) },
+ { PCI_DEVICE_DATA(INTEL, VSEC_MTL_M, &mtl_info) },
+ { PCI_DEVICE_DATA(INTEL, VSEC_MTL_S, &mtl_info) },
{ PCI_DEVICE_DATA(INTEL, VSEC_OOBMSM, &(struct intel_vsec_platform_info) {}) },
{ PCI_DEVICE_DATA(INTEL, VSEC_RPL, &tgl_info) },
{ PCI_DEVICE_DATA(INTEL, VSEC_TGL, &tgl_info) },
diff --git a/drivers/platform/x86/simatic-ipc.c b/drivers/platform/x86/simatic-ipc.c
index ca76076fc706..b3622419cd1a 100644
--- a/drivers/platform/x86/simatic-ipc.c
+++ b/drivers/platform/x86/simatic-ipc.c
@@ -46,7 +46,8 @@ static struct {
{SIMATIC_IPC_IPC427D, SIMATIC_IPC_DEVICE_427E, SIMATIC_IPC_DEVICE_NONE},
{SIMATIC_IPC_IPC427E, SIMATIC_IPC_DEVICE_427E, SIMATIC_IPC_DEVICE_427E},
{SIMATIC_IPC_IPC477E, SIMATIC_IPC_DEVICE_NONE, SIMATIC_IPC_DEVICE_427E},
- {SIMATIC_IPC_IPC427G, SIMATIC_IPC_DEVICE_227G, SIMATIC_IPC_DEVICE_227G},
+ {SIMATIC_IPC_IPCBX_39A, SIMATIC_IPC_DEVICE_227G, SIMATIC_IPC_DEVICE_227G},
+ {SIMATIC_IPC_IPCPX_39A, SIMATIC_IPC_DEVICE_NONE, SIMATIC_IPC_DEVICE_227G},
};
static int register_platform_devices(u32 station_id)
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 7156ae2ad196..537d6a2d0781 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -1887,14 +1887,21 @@ static int sony_nc_kbd_backlight_setup(struct platform_device *pd,
break;
}
- ret = sony_call_snc_handle(handle, probe_base, &result);
- if (ret)
- return ret;
+ /*
+ * Only probe if there is a separate probe_base, otherwise the probe call
+ * is equivalent to __sony_nc_kbd_backlight_mode_set(0), resulting in
+ * the keyboard backlight being turned off.
+ */
+ if (probe_base) {
+ ret = sony_call_snc_handle(handle, probe_base, &result);
+ if (ret)
+ return ret;
- if ((handle == 0x0137 && !(result & 0x02)) ||
- !(result & 0x01)) {
- dprintk("no backlight keyboard found\n");
- return 0;
+ if ((handle == 0x0137 && !(result & 0x02)) ||
+ !(result & 0x01)) {
+ dprintk("no backlight keyboard found\n");
+ return 0;
+ }
}
kbdbl_ctl = kzalloc(sizeof(*kbdbl_ctl), GFP_KERNEL);
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 1195293b22fd..32c10457399e 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -5563,7 +5563,7 @@ static int light_sysfs_set(struct led_classdev *led_cdev,
static enum led_brightness light_sysfs_get(struct led_classdev *led_cdev)
{
- return (light_get_status() == 1) ? LED_FULL : LED_OFF;
+ return (light_get_status() == 1) ? LED_ON : LED_OFF;
}
static struct tpacpi_led_classdev tpacpi_led_thinklight = {
@@ -10311,9 +10311,11 @@ static DEFINE_MUTEX(dytc_mutex);
static int dytc_capabilities;
static bool dytc_mmc_get_available;
-static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *profile)
+static int convert_dytc_to_profile(int funcmode, int dytcmode,
+ enum platform_profile_option *profile)
{
- if (dytc_capabilities & BIT(DYTC_FC_MMC)) {
+ switch (funcmode) {
+ case DYTC_FUNCTION_MMC:
switch (dytcmode) {
case DYTC_MODE_MMC_LOWPOWER:
*profile = PLATFORM_PROFILE_LOW_POWER;
@@ -10329,8 +10331,7 @@ static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *p
return -EINVAL;
}
return 0;
- }
- if (dytc_capabilities & BIT(DYTC_FC_PSC)) {
+ case DYTC_FUNCTION_PSC:
switch (dytcmode) {
case DYTC_MODE_PSC_LOWPOWER:
*profile = PLATFORM_PROFILE_LOW_POWER;
@@ -10344,6 +10345,14 @@ static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *p
default: /* Unknown mode */
return -EINVAL;
}
+ return 0;
+ case DYTC_FUNCTION_AMT:
+ /* For now return balanced. It's the closest we have to 'auto' */
+ *profile = PLATFORM_PROFILE_BALANCED;
+ return 0;
+ default:
+ /* Unknown function */
+ return -EOPNOTSUPP;
}
return 0;
}
@@ -10487,11 +10496,11 @@ static int dytc_profile_set(struct platform_profile_handler *pprof,
if (err)
goto unlock;
}
- }
- if (dytc_capabilities & BIT(DYTC_FC_PSC)) {
+ } else if (dytc_capabilities & BIT(DYTC_FC_PSC)) {
err = dytc_command(DYTC_SET_COMMAND(DYTC_FUNCTION_PSC, perfmode, 1), &output);
if (err)
goto unlock;
+
/* system supports AMT, activate it when on balanced */
if (dytc_capabilities & BIT(DYTC_FC_AMT))
dytc_control_amt(profile == PLATFORM_PROFILE_BALANCED);
@@ -10507,7 +10516,7 @@ static void dytc_profile_refresh(void)
{
enum platform_profile_option profile;
int output, err = 0;
- int perfmode;
+ int perfmode, funcmode;
mutex_lock(&dytc_mutex);
if (dytc_capabilities & BIT(DYTC_FC_MMC)) {
@@ -10515,15 +10524,18 @@ static void dytc_profile_refresh(void)
err = dytc_command(DYTC_CMD_MMC_GET, &output);
else
err = dytc_cql_command(DYTC_CMD_GET, &output);
- } else if (dytc_capabilities & BIT(DYTC_FC_PSC))
+ funcmode = DYTC_FUNCTION_MMC;
+ } else if (dytc_capabilities & BIT(DYTC_FC_PSC)) {
err = dytc_command(DYTC_CMD_GET, &output);
-
+ /* Check if we are PSC mode, or have AMT enabled */
+ funcmode = (output >> DYTC_GET_FUNCTION_BIT) & 0xF;
+ }
mutex_unlock(&dytc_mutex);
if (err)
return;
perfmode = (output >> DYTC_GET_MODE_BIT) & 0xF;
- convert_dytc_to_profile(perfmode, &profile);
+ convert_dytc_to_profile(funcmode, perfmode, &profile);
if (profile != dytc_current_profile) {
dytc_current_profile = profile;
platform_profile_notify();
diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index baae3120efd0..13802a3c3591 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -264,6 +264,23 @@ static const struct ts_dmi_data connect_tablet9_data = {
.properties = connect_tablet9_props,
};
+static const struct property_entry csl_panther_tab_hd_props[] = {
+ PROPERTY_ENTRY_U32("touchscreen-min-x", 1),
+ PROPERTY_ENTRY_U32("touchscreen-min-y", 20),
+ PROPERTY_ENTRY_U32("touchscreen-size-x", 1980),
+ PROPERTY_ENTRY_U32("touchscreen-size-y", 1526),
+ PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
+ PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
+ PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-csl-panther-tab-hd.fw"),
+ PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+ { }
+};
+
+static const struct ts_dmi_data csl_panther_tab_hd_data = {
+ .acpi_name = "MSSL1680:00",
+ .properties = csl_panther_tab_hd_props,
+};
+
static const struct property_entry cube_iwork8_air_props[] = {
PROPERTY_ENTRY_U32("touchscreen-min-x", 1),
PROPERTY_ENTRY_U32("touchscreen-min-y", 3),
@@ -1081,6 +1098,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
},
},
{
+ /* Chuwi Vi8 (CWI501) */
+ .driver_data = (void *)&chuwi_vi8_data,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Insyde"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "i86"),
+ DMI_MATCH(DMI_BIOS_VERSION, "CHUWI.W86JLBNR01"),
+ },
+ },
+ {
/* Chuwi Vi8 (CWI506) */
.driver_data = (void *)&chuwi_vi8_data,
.matches = {
@@ -1125,6 +1151,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
},
},
{
+ /* CSL Panther Tab HD */
+ .driver_data = (void *)&csl_panther_tab_hd_data,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "CSL Computer GmbH & Co. KG"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "CSL Panther Tab HD"),
+ },
+ },
+ {
/* CUBE iwork8 Air */
.driver_data = (void *)&cube_iwork8_air_data,
.matches = {
diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c
index e01b32d1fa17..00828f5baa97 100644
--- a/drivers/regulator/da9211-regulator.c
+++ b/drivers/regulator/da9211-regulator.c
@@ -498,6 +498,12 @@ static int da9211_i2c_probe(struct i2c_client *i2c)
chip->chip_irq = i2c->irq;
+ ret = da9211_regulator_init(chip);
+ if (ret < 0) {
+ dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret);
+ return ret;
+ }
+
if (chip->chip_irq != 0) {
ret = devm_request_threaded_irq(chip->dev, chip->chip_irq, NULL,
da9211_irq_handler,
@@ -512,11 +518,6 @@ static int da9211_i2c_probe(struct i2c_client *i2c)
dev_warn(chip->dev, "No IRQ configured\n");
}
- ret = da9211_regulator_init(chip);
-
- if (ret < 0)
- dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret);
-
return ret;
}
diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c
index 43b5b9377714..ae6021390143 100644
--- a/drivers/regulator/qcom-rpmh-regulator.c
+++ b/drivers/regulator/qcom-rpmh-regulator.c
@@ -1016,7 +1016,7 @@ static const struct rpmh_vreg_init_data pm8550_vreg_data[] = {
RPMH_VREG("ldo8", "ldo%s8", &pmic5_pldo_lv, "vdd-l8-l9"),
RPMH_VREG("ldo9", "ldo%s9", &pmic5_pldo, "vdd-l8-l9"),
RPMH_VREG("ldo10", "ldo%s10", &pmic5_nldo, "vdd-l1-l4-l10"),
- RPMH_VREG("ldo11", "ldo%s11", &pmic5_pldo, "vdd-l11"),
+ RPMH_VREG("ldo11", "ldo%s11", &pmic5_nldo, "vdd-l11"),
RPMH_VREG("ldo12", "ldo%s12", &pmic5_pldo, "vdd-l12"),
RPMH_VREG("ldo13", "ldo%s13", &pmic5_pldo, "vdd-l2-l13-l14"),
RPMH_VREG("ldo14", "ldo%s14", &pmic5_pldo, "vdd-l2-l13-l14"),
diff --git a/drivers/reset/Kconfig b/drivers/reset/Kconfig
index de176c2fbad9..2a52c990d4fe 100644
--- a/drivers/reset/Kconfig
+++ b/drivers/reset/Kconfig
@@ -257,7 +257,7 @@ config RESET_SUNXI
config RESET_TI_SCI
tristate "TI System Control Interface (TI-SCI) reset driver"
- depends on TI_SCI_PROTOCOL || COMPILE_TEST
+ depends on TI_SCI_PROTOCOL || (COMPILE_TEST && TI_SCI_PROTOCOL=n)
help
This enables the reset driver support over TI System Control Interface
available on some new TI's SoCs. If you wish to use reset resources
diff --git a/drivers/reset/reset-uniphier-glue.c b/drivers/reset/reset-uniphier-glue.c
index 146fd5d45e99..15abac9fc72c 100644
--- a/drivers/reset/reset-uniphier-glue.c
+++ b/drivers/reset/reset-uniphier-glue.c
@@ -47,7 +47,6 @@ static int uniphier_glue_reset_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct uniphier_glue_reset_priv *priv;
struct resource *res;
- resource_size_t size;
int i, ret;
priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
@@ -60,7 +59,6 @@ static int uniphier_glue_reset_probe(struct platform_device *pdev)
return -EINVAL;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- size = resource_size(res);
priv->rdata.membase = devm_ioremap_resource(dev, res);
if (IS_ERR(priv->rdata.membase))
return PTR_ERR(priv->rdata.membase);
@@ -96,7 +94,7 @@ static int uniphier_glue_reset_probe(struct platform_device *pdev)
spin_lock_init(&priv->rdata.lock);
priv->rdata.rcdev.owner = THIS_MODULE;
- priv->rdata.rcdev.nr_resets = size * BITS_PER_BYTE;
+ priv->rdata.rcdev.nr_resets = resource_size(res) * BITS_PER_BYTE;
priv->rdata.rcdev.ops = &reset_simple_ops;
priv->rdata.rcdev.of_node = dev->of_node;
priv->rdata.active_low = true;
diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c
index e991cccdb6e9..1e8bc6cc1e12 100644
--- a/drivers/rtc/rtc-efi.c
+++ b/drivers/rtc/rtc-efi.c
@@ -188,9 +188,10 @@ static int efi_set_time(struct device *dev, struct rtc_time *tm)
static int efi_procfs(struct device *dev, struct seq_file *seq)
{
- efi_time_t eft, alm;
- efi_time_cap_t cap;
- efi_bool_t enabled, pending;
+ efi_time_t eft, alm;
+ efi_time_cap_t cap;
+ efi_bool_t enabled, pending;
+ struct rtc_device *rtc = dev_get_drvdata(dev);
memset(&eft, 0, sizeof(eft));
memset(&alm, 0, sizeof(alm));
@@ -213,23 +214,25 @@ static int efi_procfs(struct device *dev, struct seq_file *seq)
/* XXX fixme: convert to string? */
seq_printf(seq, "Timezone\t: %u\n", eft.timezone);
- seq_printf(seq,
- "Alarm Time\t: %u:%u:%u.%09u\n"
- "Alarm Date\t: %u-%u-%u\n"
- "Alarm Daylight\t: %u\n"
- "Enabled\t\t: %s\n"
- "Pending\t\t: %s\n",
- alm.hour, alm.minute, alm.second, alm.nanosecond,
- alm.year, alm.month, alm.day,
- alm.daylight,
- enabled == 1 ? "yes" : "no",
- pending == 1 ? "yes" : "no");
-
- if (eft.timezone == EFI_UNSPECIFIED_TIMEZONE)
- seq_puts(seq, "Timezone\t: unspecified\n");
- else
- /* XXX fixme: convert to string? */
- seq_printf(seq, "Timezone\t: %u\n", alm.timezone);
+ if (test_bit(RTC_FEATURE_ALARM, rtc->features)) {
+ seq_printf(seq,
+ "Alarm Time\t: %u:%u:%u.%09u\n"
+ "Alarm Date\t: %u-%u-%u\n"
+ "Alarm Daylight\t: %u\n"
+ "Enabled\t\t: %s\n"
+ "Pending\t\t: %s\n",
+ alm.hour, alm.minute, alm.second, alm.nanosecond,
+ alm.year, alm.month, alm.day,
+ alm.daylight,
+ enabled == 1 ? "yes" : "no",
+ pending == 1 ? "yes" : "no");
+
+ if (eft.timezone == EFI_UNSPECIFIED_TIMEZONE)
+ seq_puts(seq, "Timezone\t: unspecified\n");
+ else
+ /* XXX fixme: convert to string? */
+ seq_printf(seq, "Timezone\t: %u\n", alm.timezone);
+ }
/*
* now prints the capabilities
@@ -269,7 +272,10 @@ static int __init efi_rtc_probe(struct platform_device *dev)
rtc->ops = &efi_rtc_ops;
clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->features);
- set_bit(RTC_FEATURE_ALARM_WAKEUP_ONLY, rtc->features);
+ if (efi_rt_services_supported(EFI_RT_SUPPORTED_WAKEUP_SERVICES))
+ set_bit(RTC_FEATURE_ALARM_WAKEUP_ONLY, rtc->features);
+ else
+ clear_bit(RTC_FEATURE_ALARM, rtc->features);
device_init_wakeup(&dev->dev, true);
diff --git a/drivers/rtc/rtc-sunplus.c b/drivers/rtc/rtc-sunplus.c
index e8e2ab1103fc..4b578e4d44f6 100644
--- a/drivers/rtc/rtc-sunplus.c
+++ b/drivers/rtc/rtc-sunplus.c
@@ -240,8 +240,8 @@ static int sp_rtc_probe(struct platform_device *plat_dev)
if (IS_ERR(sp_rtc->reg_base))
return dev_err_probe(&plat_dev->dev, PTR_ERR(sp_rtc->reg_base),
"%s devm_ioremap_resource fail\n", RTC_REG_NAME);
- dev_dbg(&plat_dev->dev, "res = 0x%x, reg_base = 0x%lx\n",
- sp_rtc->res->start, (unsigned long)sp_rtc->reg_base);
+ dev_dbg(&plat_dev->dev, "res = %pR, reg_base = %p\n",
+ sp_rtc->res, sp_rtc->reg_base);
sp_rtc->irq = platform_get_irq(plat_dev, 0);
if (sp_rtc->irq < 0)
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index b392b9f5482e..c0f85ffb2b62 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -865,6 +865,8 @@ dcssblk_submit_bio(struct bio *bio)
unsigned long bytes_done;
bio = bio_split_to_limits(bio);
+ if (!bio)
+ return;
bytes_done = 0;
dev_info = bio->bi_bdev->bd_disk->private_data;
diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c
index 406be169173c..d1adc4b83193 100644
--- a/drivers/s390/net/qeth_core_sys.c
+++ b/drivers/s390/net/qeth_core_sys.c
@@ -410,13 +410,13 @@ static ssize_t qeth_dev_isolation_show(struct device *dev,
switch (card->options.isolation) {
case ISOLATION_MODE_NONE:
- return snprintf(buf, 6, "%s\n", ATTR_QETH_ISOLATION_NONE);
+ return sysfs_emit(buf, "%s\n", ATTR_QETH_ISOLATION_NONE);
case ISOLATION_MODE_FWD:
- return snprintf(buf, 9, "%s\n", ATTR_QETH_ISOLATION_FWD);
+ return sysfs_emit(buf, "%s\n", ATTR_QETH_ISOLATION_FWD);
case ISOLATION_MODE_DROP:
- return snprintf(buf, 6, "%s\n", ATTR_QETH_ISOLATION_DROP);
+ return sysfs_emit(buf, "%s\n", ATTR_QETH_ISOLATION_DROP);
default:
- return snprintf(buf, 5, "%s\n", "N/A");
+ return sysfs_emit(buf, "%s\n", "N/A");
}
}
@@ -500,9 +500,9 @@ static ssize_t qeth_hw_trap_show(struct device *dev,
struct qeth_card *card = dev_get_drvdata(dev);
if (card->info.hwtrap)
- return snprintf(buf, 5, "arm\n");
+ return sysfs_emit(buf, "arm\n");
else
- return snprintf(buf, 8, "disarm\n");
+ return sysfs_emit(buf, "disarm\n");
}
static ssize_t qeth_hw_trap_store(struct device *dev,
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 49cc18a87473..29a2865b8e2e 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -981,6 +981,9 @@ queue_rtpg:
*
* Returns true if and only if alua_rtpg_work() will be called asynchronously.
* That function is responsible for calling @qdata->fn().
+ *
+ * Context: may be called from atomic context (alua_check()) only if the caller
+ * holds an sdev reference.
*/
static bool alua_rtpg_queue(struct alua_port_group *pg,
struct scsi_device *sdev,
@@ -989,8 +992,6 @@ static bool alua_rtpg_queue(struct alua_port_group *pg,
int start_queue = 0;
unsigned long flags;
- might_sleep();
-
if (WARN_ON_ONCE(!pg) || scsi_device_get(sdev))
return false;
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 41ba22f6c7f0..8c038ccf1c09 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -162,7 +162,7 @@ static void hisi_sas_slot_index_clear(struct hisi_hba *hisi_hba, int slot_idx)
static void hisi_sas_slot_index_free(struct hisi_hba *hisi_hba, int slot_idx)
{
if (hisi_hba->hw->slot_index_alloc ||
- slot_idx >= HISI_SAS_UNRESERVED_IPTT) {
+ slot_idx < HISI_SAS_RESERVED_IPTT) {
spin_lock(&hisi_hba->lock);
hisi_sas_slot_index_clear(hisi_hba, slot_idx);
spin_unlock(&hisi_hba->lock);
@@ -704,7 +704,7 @@ static int hisi_sas_init_device(struct domain_device *device)
int_to_scsilun(0, &lun);
while (retry-- > 0) {
- rc = sas_clear_task_set(device, lun.scsi_lun);
+ rc = sas_abort_task_set(device, lun.scsi_lun);
if (rc == TMF_RESP_FUNC_COMPLETE) {
hisi_sas_release_task(hisi_hba, device);
break;
@@ -1316,7 +1316,7 @@ static void hisi_sas_refresh_port_id(struct hisi_hba *hisi_hba)
device->linkrate = phy->sas_phy.linkrate;
hisi_hba->hw->setup_itct(hisi_hba, sas_dev);
- } else
+ } else if (!port->port_attached)
port->id = 0xff;
}
}
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 4dbf51e2623a..f6da34850af9 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -5850,7 +5850,7 @@ static int hpsa_scsi_host_alloc(struct ctlr_info *h)
{
struct Scsi_Host *sh;
- sh = scsi_host_alloc(&hpsa_driver_template, sizeof(h));
+ sh = scsi_host_alloc(&hpsa_driver_template, sizeof(struct ctlr_info));
if (sh == NULL) {
dev_err(&h->pdev->dev, "scsi_host_alloc failed\n");
return -ENOMEM;
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 1d1cf641937c..0454d94e8cf0 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -849,7 +849,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,
enum iscsi_host_param param, char *buf)
{
struct iscsi_sw_tcp_host *tcp_sw_host = iscsi_host_priv(shost);
- struct iscsi_session *session = tcp_sw_host->session;
+ struct iscsi_session *session;
struct iscsi_conn *conn;
struct iscsi_tcp_conn *tcp_conn;
struct iscsi_sw_tcp_conn *tcp_sw_conn;
@@ -859,6 +859,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,
switch (param) {
case ISCSI_HOST_PARAM_IPADDRESS:
+ session = tcp_sw_host->session;
if (!session)
return -ENOTCONN;
@@ -959,11 +960,13 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
if (!cls_session)
goto remove_host;
session = cls_session->dd_data;
- tcp_sw_host = iscsi_host_priv(shost);
- tcp_sw_host->session = session;
if (iscsi_tcp_r2tpool_alloc(session))
goto remove_session;
+
+ /* We are now fully setup so expose the session to sysfs. */
+ tcp_sw_host = iscsi_host_priv(shost);
+ tcp_sw_host->session = session;
return cls_session;
remove_session:
@@ -983,10 +986,17 @@ static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session)
if (WARN_ON_ONCE(session->leadconn))
return;
+ iscsi_session_remove(cls_session);
+ /*
+ * Our get_host_param needs to access the session, so remove the
+ * host from sysfs before freeing the session to make sure userspace
+ * is no longer accessing the callout.
+ */
+ iscsi_host_remove(shost, false);
+
iscsi_tcp_r2tpool_free(cls_session->dd_data);
- iscsi_session_teardown(cls_session);
- iscsi_host_remove(shost, false);
+ iscsi_session_free(cls_session);
iscsi_host_free(shost);
}
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index ef2fc860257e..127f3d7f19dc 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -3104,17 +3104,32 @@ dec_session_count:
}
EXPORT_SYMBOL_GPL(iscsi_session_setup);
-/**
- * iscsi_session_teardown - destroy session, host, and cls_session
- * @cls_session: iscsi session
+/*
+ * issi_session_remove - Remove session from iSCSI class.
*/
-void iscsi_session_teardown(struct iscsi_cls_session *cls_session)
+void iscsi_session_remove(struct iscsi_cls_session *cls_session)
{
struct iscsi_session *session = cls_session->dd_data;
- struct module *owner = cls_session->transport->owner;
struct Scsi_Host *shost = session->host;
iscsi_remove_session(cls_session);
+ /*
+ * host removal only has to wait for its children to be removed from
+ * sysfs, and iscsi_tcp needs to do iscsi_host_remove before freeing
+ * the session, so drop the session count here.
+ */
+ iscsi_host_dec_session_cnt(shost);
+}
+EXPORT_SYMBOL_GPL(iscsi_session_remove);
+
+/**
+ * iscsi_session_free - Free iscsi session and it's resources
+ * @cls_session: iscsi session
+ */
+void iscsi_session_free(struct iscsi_cls_session *cls_session)
+{
+ struct iscsi_session *session = cls_session->dd_data;
+ struct module *owner = cls_session->transport->owner;
iscsi_pool_free(&session->cmdpool);
kfree(session->password);
@@ -3132,10 +3147,19 @@ void iscsi_session_teardown(struct iscsi_cls_session *cls_session)
kfree(session->discovery_parent_type);
iscsi_free_session(cls_session);
-
- iscsi_host_dec_session_cnt(shost);
module_put(owner);
}
+EXPORT_SYMBOL_GPL(iscsi_session_free);
+
+/**
+ * iscsi_session_teardown - destroy session and cls_session
+ * @cls_session: iscsi session
+ */
+void iscsi_session_teardown(struct iscsi_cls_session *cls_session)
+{
+ iscsi_session_remove(cls_session);
+ iscsi_session_free(cls_session);
+}
EXPORT_SYMBOL_GPL(iscsi_session_teardown);
/**
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 1ccce706167a..5e80225b5308 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -889,7 +889,9 @@ void sas_ata_device_link_abort(struct domain_device *device, bool force_reset)
{
struct ata_port *ap = device->sata_dev.ap;
struct ata_link *link = &ap->link;
+ unsigned long flags;
+ spin_lock_irqsave(ap->lock, flags);
device->sata_dev.fis[2] = ATA_ERR | ATA_DRDY; /* tf status */
device->sata_dev.fis[3] = ATA_ABORTED; /* tf error */
@@ -897,6 +899,7 @@ void sas_ata_device_link_abort(struct domain_device *device, bool force_reset)
if (force_reset)
link->eh_info.action |= ATA_EH_RESET;
ata_link_abort(link);
+ spin_unlock_irqrestore(ap->lock, flags);
}
EXPORT_SYMBOL_GPL(sas_ata_device_link_abort);
diff --git a/drivers/scsi/mpi3mr/Makefile b/drivers/scsi/mpi3mr/Makefile
index ef86ca46646b..3bf8cf34e1c3 100644
--- a/drivers/scsi/mpi3mr/Makefile
+++ b/drivers/scsi/mpi3mr/Makefile
@@ -1,5 +1,5 @@
# mpi3mr makefile
-obj-m += mpi3mr.o
+obj-$(CONFIG_SCSI_MPI3MR) += mpi3mr.o
mpi3mr-y += mpi3mr_os.o \
mpi3mr_fw.o \
mpi3mr_app.o \
diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c
index 0c4aabaefdcc..286a44506578 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_fw.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c
@@ -3633,8 +3633,7 @@ int mpi3mr_setup_resources(struct mpi3mr_ioc *mrioc)
int i, retval = 0, capb = 0;
u16 message_control;
u64 dma_mask = mrioc->dma_mask ? mrioc->dma_mask :
- (((dma_get_required_mask(&pdev->dev) > DMA_BIT_MASK(32)) &&
- (sizeof(dma_addr_t) > 4)) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32));
+ ((sizeof(dma_addr_t) > 4) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32));
if (pci_enable_device_mem(pdev)) {
ioc_err(mrioc, "pci_enable_device_mem: failed\n");
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 4e981ccaac41..69061545d9d2 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -2992,8 +2992,7 @@ _base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev)
struct sysinfo s;
u64 coherent_dma_mask, dma_mask;
- if (ioc->is_mcpu_endpoint || sizeof(dma_addr_t) == 4 ||
- dma_get_required_mask(&pdev->dev) <= DMA_BIT_MASK(32)) {
+ if (ioc->is_mcpu_endpoint || sizeof(dma_addr_t) == 4) {
ioc->dma_mask = 32;
coherent_dma_mask = dma_mask = DMA_BIT_MASK(32);
/* Set 63 bit DMA mask for all SAS3 and SAS35 controllers */
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 1426b9b03612..9feb0323bc44 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -588,8 +588,6 @@ void scsi_device_put(struct scsi_device *sdev)
{
struct module *mod = sdev->host->hostt->module;
- might_sleep();
-
put_device(&sdev->sdev_gendev);
module_put(mod);
}
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index cc6953809a24..8553277effb3 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -1511,8 +1511,6 @@ static int inquiry_vpd_b0(unsigned char *arr)
put_unaligned_be64(sdebug_write_same_length, &arr[32]);
return 0x3c; /* Mandatory page length for Logical Block Provisioning */
-
- return sizeof(vpdb0_data);
}
/* Block device characteristics VPD page (SBC-3) */
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index a7960ad2d386..2aa2c2aee6e7 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -231,6 +231,11 @@ scsi_abort_command(struct scsi_cmnd *scmd)
struct Scsi_Host *shost = sdev->host;
unsigned long flags;
+ if (!shost->hostt->eh_abort_handler) {
+ /* No abort handler, fail command directly */
+ return FAILED;
+ }
+
if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) {
/*
* Retry after abort failed, escalate to next level.
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 7a6904a3928e..f9b18fdc7b3c 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1232,8 +1232,7 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget,
* that no LUN is present, so don't add sdev in these cases.
* Two specific examples are:
* 1) NetApp targets: return PQ=1, PDT=0x1f
- * 2) IBM/2145 targets: return PQ=1, PDT=0
- * 3) USB UFI: returns PDT=0x1f, with the PQ bits being "reserved"
+ * 2) USB UFI: returns PDT=0x1f, with the PQ bits being "reserved"
* in the UFI 1.0 spec (we cannot rely on reserved bits).
*
* References:
@@ -1247,8 +1246,8 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget,
* PDT=00h Direct-access device (floppy)
* PDT=1Fh none (no FDD connected to the requested logical unit)
*/
- if (((result[0] >> 5) == 1 ||
- (starget->pdt_1f_for_no_lun && (result[0] & 0x1f) == 0x1f)) &&
+ if (((result[0] >> 5) == 1 || starget->pdt_1f_for_no_lun) &&
+ (result[0] & 0x1f) == 0x1f &&
!scsi_is_wlun(lun)) {
SCSI_LOG_SCAN_BUS(3, sdev_printk(KERN_INFO, sdev,
"scsi scan: peripheral device type"
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 981d1bab2120..8ef9a5494340 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -451,6 +451,8 @@ static void scsi_device_dev_release(struct device *dev)
struct scsi_vpd *vpd_pgb0 = NULL, *vpd_pgb1 = NULL, *vpd_pgb2 = NULL;
unsigned long flags;
+ might_sleep();
+
scsi_dh_release_device(sdev);
parent = sdev->sdev_gendev.parent;
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 13cfd3e317cc..b9b97300e3b3 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1677,6 +1677,13 @@ static const char *iscsi_session_state_name(int state)
return name;
}
+static char *iscsi_session_target_state_name[] = {
+ [ISCSI_SESSION_TARGET_UNBOUND] = "UNBOUND",
+ [ISCSI_SESSION_TARGET_ALLOCATED] = "ALLOCATED",
+ [ISCSI_SESSION_TARGET_SCANNED] = "SCANNED",
+ [ISCSI_SESSION_TARGET_UNBINDING] = "UNBINDING",
+};
+
int iscsi_session_chkready(struct iscsi_cls_session *session)
{
int err;
@@ -1786,9 +1793,13 @@ static int iscsi_user_scan_session(struct device *dev, void *data)
if ((scan_data->channel == SCAN_WILD_CARD ||
scan_data->channel == 0) &&
(scan_data->id == SCAN_WILD_CARD ||
- scan_data->id == id))
+ scan_data->id == id)) {
scsi_scan_target(&session->dev, 0, id,
scan_data->lun, scan_data->rescan);
+ spin_lock_irqsave(&session->lock, flags);
+ session->target_state = ISCSI_SESSION_TARGET_SCANNED;
+ spin_unlock_irqrestore(&session->lock, flags);
+ }
}
user_scan_exit:
@@ -1961,31 +1972,41 @@ static void __iscsi_unbind_session(struct work_struct *work)
struct iscsi_cls_host *ihost = shost->shost_data;
unsigned long flags;
unsigned int target_id;
+ bool remove_target = true;
ISCSI_DBG_TRANS_SESSION(session, "Unbinding session\n");
/* Prevent new scans and make sure scanning is not in progress */
mutex_lock(&ihost->mutex);
spin_lock_irqsave(&session->lock, flags);
- if (session->target_id == ISCSI_MAX_TARGET) {
+ if (session->target_state == ISCSI_SESSION_TARGET_ALLOCATED) {
+ remove_target = false;
+ } else if (session->target_state != ISCSI_SESSION_TARGET_SCANNED) {
spin_unlock_irqrestore(&session->lock, flags);
mutex_unlock(&ihost->mutex);
- goto unbind_session_exit;
+ ISCSI_DBG_TRANS_SESSION(session,
+ "Skipping target unbinding: Session is unbound/unbinding.\n");
+ return;
}
+ session->target_state = ISCSI_SESSION_TARGET_UNBINDING;
target_id = session->target_id;
session->target_id = ISCSI_MAX_TARGET;
spin_unlock_irqrestore(&session->lock, flags);
mutex_unlock(&ihost->mutex);
- scsi_remove_target(&session->dev);
+ if (remove_target)
+ scsi_remove_target(&session->dev);
if (session->ida_used)
ida_free(&iscsi_sess_ida, target_id);
-unbind_session_exit:
iscsi_session_event(session, ISCSI_KEVENT_UNBIND_SESSION);
ISCSI_DBG_TRANS_SESSION(session, "Completed target removal\n");
+
+ spin_lock_irqsave(&session->lock, flags);
+ session->target_state = ISCSI_SESSION_TARGET_UNBOUND;
+ spin_unlock_irqrestore(&session->lock, flags);
}
static void __iscsi_destroy_session(struct work_struct *work)
@@ -2062,6 +2083,9 @@ int iscsi_add_session(struct iscsi_cls_session *session, unsigned int target_id)
session->ida_used = true;
} else
session->target_id = target_id;
+ spin_lock_irqsave(&session->lock, flags);
+ session->target_state = ISCSI_SESSION_TARGET_ALLOCATED;
+ spin_unlock_irqrestore(&session->lock, flags);
dev_set_name(&session->dev, "session%u", session->sid);
err = device_add(&session->dev);
@@ -4370,6 +4394,19 @@ iscsi_session_attr(discovery_parent_idx, ISCSI_PARAM_DISCOVERY_PARENT_IDX, 0);
iscsi_session_attr(discovery_parent_type, ISCSI_PARAM_DISCOVERY_PARENT_TYPE, 0);
static ssize_t
+show_priv_session_target_state(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent);
+
+ return sysfs_emit(buf, "%s\n",
+ iscsi_session_target_state_name[session->target_state]);
+}
+
+static ISCSI_CLASS_ATTR(priv_sess, target_state, S_IRUGO,
+ show_priv_session_target_state, NULL);
+
+static ssize_t
show_priv_session_state(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -4471,6 +4508,7 @@ static struct attribute *iscsi_session_attrs[] = {
&dev_attr_sess_boot_target.attr,
&dev_attr_priv_sess_recovery_tmo.attr,
&dev_attr_priv_sess_state.attr,
+ &dev_attr_priv_sess_target_state.attr,
&dev_attr_priv_sess_creator.attr,
&dev_attr_sess_chap_out_idx.attr,
&dev_attr_sess_chap_in_idx.attr,
@@ -4584,6 +4622,8 @@ static umode_t iscsi_session_attr_is_visible(struct kobject *kobj,
return S_IRUGO | S_IWUSR;
else if (attr == &dev_attr_priv_sess_state.attr)
return S_IRUGO;
+ else if (attr == &dev_attr_priv_sess_target_state.attr)
+ return S_IRUGO;
else if (attr == &dev_attr_priv_sess_creator.attr)
return S_IRUGO;
else if (attr == &dev_attr_priv_sess_target_id.attr)
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index d7a84c0bfaeb..22705eb781b0 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1823,6 +1823,9 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
ret = storvsc_do_io(dev, cmd_request, get_cpu());
put_cpu();
+ if (ret)
+ scsi_dma_unmap(scmnd);
+
if (ret == -EAGAIN) {
/* no more space */
ret = SCSI_MLQUEUE_DEVICE_BUSY;
diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c
index 66b316d173b0..71a3bb83984c 100644
--- a/drivers/scsi/xen-scsifront.c
+++ b/drivers/scsi/xen-scsifront.c
@@ -995,7 +995,7 @@ static int scsifront_suspend(struct xenbus_device *dev)
return err;
}
-static int scsifront_remove(struct xenbus_device *dev)
+static void scsifront_remove(struct xenbus_device *dev)
{
struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
@@ -1011,8 +1011,6 @@ static int scsifront_remove(struct xenbus_device *dev)
scsifront_free_ring(info);
scsi_host_put(info->host);
-
- return 0;
}
static void scsifront_disconnect(struct vscsifrnt_info *info)
diff --git a/drivers/soc/imx/imx8mp-blk-ctrl.c b/drivers/soc/imx/imx8mp-blk-ctrl.c
index 0e3b6ba22f94..0f13853901df 100644
--- a/drivers/soc/imx/imx8mp-blk-ctrl.c
+++ b/drivers/soc/imx/imx8mp-blk-ctrl.c
@@ -212,7 +212,7 @@ static void imx8mp_hdmi_blk_ctrl_power_on(struct imx8mp_blk_ctrl *bc,
break;
case IMX8MP_HDMIBLK_PD_LCDIF:
regmap_set_bits(bc->regmap, HDMI_RTX_CLK_CTL0,
- BIT(7) | BIT(16) | BIT(17) | BIT(18) |
+ BIT(16) | BIT(17) | BIT(18) |
BIT(19) | BIT(20));
regmap_set_bits(bc->regmap, HDMI_RTX_CLK_CTL1, BIT(11));
regmap_set_bits(bc->regmap, HDMI_RTX_RESET_CTL0,
@@ -241,6 +241,7 @@ static void imx8mp_hdmi_blk_ctrl_power_on(struct imx8mp_blk_ctrl *bc,
regmap_set_bits(bc->regmap, HDMI_TX_CONTROL0, BIT(1));
break;
case IMX8MP_HDMIBLK_PD_HDMI_TX_PHY:
+ regmap_set_bits(bc->regmap, HDMI_RTX_CLK_CTL0, BIT(7));
regmap_set_bits(bc->regmap, HDMI_RTX_CLK_CTL1, BIT(22) | BIT(24));
regmap_set_bits(bc->regmap, HDMI_RTX_RESET_CTL0, BIT(12));
regmap_clear_bits(bc->regmap, HDMI_TX_CONTROL0, BIT(3));
@@ -270,7 +271,7 @@ static void imx8mp_hdmi_blk_ctrl_power_off(struct imx8mp_blk_ctrl *bc,
BIT(4) | BIT(5) | BIT(6));
regmap_clear_bits(bc->regmap, HDMI_RTX_CLK_CTL1, BIT(11));
regmap_clear_bits(bc->regmap, HDMI_RTX_CLK_CTL0,
- BIT(7) | BIT(16) | BIT(17) | BIT(18) |
+ BIT(16) | BIT(17) | BIT(18) |
BIT(19) | BIT(20));
break;
case IMX8MP_HDMIBLK_PD_PAI:
@@ -298,6 +299,7 @@ static void imx8mp_hdmi_blk_ctrl_power_off(struct imx8mp_blk_ctrl *bc,
case IMX8MP_HDMIBLK_PD_HDMI_TX_PHY:
regmap_set_bits(bc->regmap, HDMI_TX_CONTROL0, BIT(3));
regmap_clear_bits(bc->regmap, HDMI_RTX_RESET_CTL0, BIT(12));
+ regmap_clear_bits(bc->regmap, HDMI_RTX_CLK_CTL0, BIT(7));
regmap_clear_bits(bc->regmap, HDMI_RTX_CLK_CTL1, BIT(22) | BIT(24));
break;
case IMX8MP_HDMIBLK_PD_HDCP:
@@ -590,7 +592,6 @@ static int imx8mp_blk_ctrl_probe(struct platform_device *pdev)
ret = PTR_ERR(domain->power_dev);
goto cleanup_pds;
}
- dev_set_name(domain->power_dev, "%s", data->name);
domain->genpd.name = data->name;
domain->genpd.power_on = imx8mp_blk_ctrl_power_on;
diff --git a/drivers/soc/imx/soc-imx8m.c b/drivers/soc/imx/soc-imx8m.c
index 28144c699b0c..32ed9dc88e45 100644
--- a/drivers/soc/imx/soc-imx8m.c
+++ b/drivers/soc/imx/soc-imx8m.c
@@ -66,8 +66,8 @@ static u32 __init imx8mq_soc_revision(void)
ocotp_base = of_iomap(np, 0);
WARN_ON(!ocotp_base);
clk = of_clk_get_by_name(np, NULL);
- if (!clk) {
- WARN_ON(!clk);
+ if (IS_ERR(clk)) {
+ WARN_ON(IS_ERR(clk));
return 0;
}
diff --git a/drivers/soc/qcom/apr.c b/drivers/soc/qcom/apr.c
index cd44f17dad3d..d51abb462ae5 100644
--- a/drivers/soc/qcom/apr.c
+++ b/drivers/soc/qcom/apr.c
@@ -461,9 +461,10 @@ static int apr_add_device(struct device *dev, struct device_node *np,
goto out;
}
+ /* Protection domain is optional, it does not exist on older platforms */
ret = of_property_read_string_index(np, "qcom,protection-domain",
1, &adev->service_path);
- if (ret < 0) {
+ if (ret < 0 && ret != -EINVAL) {
dev_err(dev, "Failed to read second value of qcom,protection-domain\n");
goto out;
}
diff --git a/drivers/soc/qcom/cpr.c b/drivers/soc/qcom/cpr.c
index e9b854ed1bdf..144ea68e0920 100644
--- a/drivers/soc/qcom/cpr.c
+++ b/drivers/soc/qcom/cpr.c
@@ -1708,12 +1708,16 @@ static int cpr_probe(struct platform_device *pdev)
ret = of_genpd_add_provider_simple(dev->of_node, &drv->pd);
if (ret)
- return ret;
+ goto err_remove_genpd;
platform_set_drvdata(pdev, drv);
cpr_debugfs_init(drv);
return 0;
+
+err_remove_genpd:
+ pm_genpd_remove(&drv->pd);
+ return ret;
}
static int cpr_remove(struct platform_device *pdev)
diff --git a/drivers/spi/spi-cadence-xspi.c b/drivers/spi/spi-cadence-xspi.c
index 520b4cc69cdc..91db3c973167 100644
--- a/drivers/spi/spi-cadence-xspi.c
+++ b/drivers/spi/spi-cadence-xspi.c
@@ -177,7 +177,10 @@
#define CDNS_XSPI_CMD_FLD_DSEQ_CMD_3(op) ( \
FIELD_PREP(CDNS_XSPI_CMD_DSEQ_R3_DCNT_H, \
((op)->data.nbytes >> 16) & 0xffff) | \
- FIELD_PREP(CDNS_XSPI_CMD_DSEQ_R3_NUM_OF_DUMMY, (op)->dummy.nbytes * 8))
+ FIELD_PREP(CDNS_XSPI_CMD_DSEQ_R3_NUM_OF_DUMMY, \
+ (op)->dummy.buswidth != 0 ? \
+ (((op)->dummy.nbytes * 8) / (op)->dummy.buswidth) : \
+ 0))
#define CDNS_XSPI_CMD_FLD_DSEQ_CMD_4(op, chipsel) ( \
FIELD_PREP(CDNS_XSPI_CMD_DSEQ_R4_BANK, chipsel) | \
diff --git a/drivers/spi/spi-dw-core.c b/drivers/spi/spi-dw-core.c
index 99edddf9958b..c3bfb6c84cab 100644
--- a/drivers/spi/spi-dw-core.c
+++ b/drivers/spi/spi-dw-core.c
@@ -366,7 +366,7 @@ static void dw_spi_irq_setup(struct dw_spi *dws)
* will be adjusted at the final stage of the IRQ-based SPI transfer
* execution so not to lose the leftover of the incoming data.
*/
- level = min_t(u16, dws->fifo_len / 2, dws->tx_len);
+ level = min_t(unsigned int, dws->fifo_len / 2, dws->tx_len);
dw_writel(dws, DW_SPI_TXFTLR, level);
dw_writel(dws, DW_SPI_RXFTLR, level - 1);
diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c
index 6de8360e5c2a..9eab6c20dbc5 100644
--- a/drivers/spi/spi-mt65xx.c
+++ b/drivers/spi/spi-mt65xx.c
@@ -1253,6 +1253,11 @@ static int mtk_spi_probe(struct platform_device *pdev)
dev_notice(dev, "SPI dma_set_mask(%d) failed, ret:%d\n",
addr_bits, ret);
+ ret = devm_request_irq(dev, irq, mtk_spi_interrupt,
+ IRQF_TRIGGER_NONE, dev_name(dev), master);
+ if (ret)
+ return dev_err_probe(dev, ret, "failed to register irq\n");
+
pm_runtime_enable(dev);
ret = devm_spi_register_master(dev, master);
@@ -1261,13 +1266,6 @@ static int mtk_spi_probe(struct platform_device *pdev)
return dev_err_probe(dev, ret, "failed to register master\n");
}
- ret = devm_request_irq(dev, irq, mtk_spi_interrupt,
- IRQF_TRIGGER_NONE, dev_name(dev), master);
- if (ret) {
- pm_runtime_disable(dev);
- return dev_err_probe(dev, ret, "failed to register irq\n");
- }
-
return 0;
}
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 3cc7bb4d03de..3f33934f5429 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -2220,11 +2220,26 @@ void spi_flush_queue(struct spi_controller *ctlr)
/*-------------------------------------------------------------------------*/
#if defined(CONFIG_OF)
+static void of_spi_parse_dt_cs_delay(struct device_node *nc,
+ struct spi_delay *delay, const char *prop)
+{
+ u32 value;
+
+ if (!of_property_read_u32(nc, prop, &value)) {
+ if (value > U16_MAX) {
+ delay->value = DIV_ROUND_UP(value, 1000);
+ delay->unit = SPI_DELAY_UNIT_USECS;
+ } else {
+ delay->value = value;
+ delay->unit = SPI_DELAY_UNIT_NSECS;
+ }
+ }
+}
+
static int of_spi_parse_dt(struct spi_controller *ctlr, struct spi_device *spi,
struct device_node *nc)
{
u32 value;
- u16 cs_setup;
int rc;
/* Mode (clock phase/polarity/etc.) */
@@ -2310,10 +2325,8 @@ static int of_spi_parse_dt(struct spi_controller *ctlr, struct spi_device *spi,
if (!of_property_read_u32(nc, "spi-max-frequency", &value))
spi->max_speed_hz = value;
- if (!of_property_read_u16(nc, "spi-cs-setup-ns", &cs_setup)) {
- spi->cs_setup.value = cs_setup;
- spi->cs_setup.unit = SPI_DELAY_UNIT_NSECS;
- }
+ /* Device CS delays */
+ of_spi_parse_dt_cs_delay(nc, &spi->cs_setup, "spi-cs-setup-delay-ns");
return 0;
}
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 6313e7d0cdf8..a1ea093795cf 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -68,7 +68,7 @@ static_assert(N_SPI_MINORS > 0 && N_SPI_MINORS <= 256);
struct spidev_data {
dev_t devt;
- spinlock_t spi_lock;
+ struct mutex spi_lock;
struct spi_device *spi;
struct list_head device_entry;
@@ -90,22 +90,32 @@ MODULE_PARM_DESC(bufsiz, "data bytes in biggest supported SPI message");
/*-------------------------------------------------------------------------*/
static ssize_t
+spidev_sync_unlocked(struct spi_device *spi, struct spi_message *message)
+{
+ ssize_t status;
+
+ status = spi_sync(spi, message);
+ if (status == 0)
+ status = message->actual_length;
+
+ return status;
+}
+
+static ssize_t
spidev_sync(struct spidev_data *spidev, struct spi_message *message)
{
- int status;
+ ssize_t status;
struct spi_device *spi;
- spin_lock_irq(&spidev->spi_lock);
+ mutex_lock(&spidev->spi_lock);
spi = spidev->spi;
- spin_unlock_irq(&spidev->spi_lock);
if (spi == NULL)
status = -ESHUTDOWN;
else
- status = spi_sync(spi, message);
+ status = spidev_sync_unlocked(spi, message);
- if (status == 0)
- status = message->actual_length;
+ mutex_unlock(&spidev->spi_lock);
return status;
}
@@ -294,7 +304,7 @@ static int spidev_message(struct spidev_data *spidev,
spi_message_add_tail(k_tmp, &msg);
}
- status = spidev_sync(spidev, &msg);
+ status = spidev_sync_unlocked(spidev->spi, &msg);
if (status < 0)
goto done;
@@ -359,12 +369,12 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
* we issue this ioctl.
*/
spidev = filp->private_data;
- spin_lock_irq(&spidev->spi_lock);
+ mutex_lock(&spidev->spi_lock);
spi = spi_dev_get(spidev->spi);
- spin_unlock_irq(&spidev->spi_lock);
-
- if (spi == NULL)
+ if (spi == NULL) {
+ mutex_unlock(&spidev->spi_lock);
return -ESHUTDOWN;
+ }
/* use the buffer lock here for triple duty:
* - prevent I/O (from us) so calling spi_setup() is safe;
@@ -508,6 +518,7 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
mutex_unlock(&spidev->buf_lock);
spi_dev_put(spi);
+ mutex_unlock(&spidev->spi_lock);
return retval;
}
@@ -529,12 +540,12 @@ spidev_compat_ioc_message(struct file *filp, unsigned int cmd,
* we issue this ioctl.
*/
spidev = filp->private_data;
- spin_lock_irq(&spidev->spi_lock);
+ mutex_lock(&spidev->spi_lock);
spi = spi_dev_get(spidev->spi);
- spin_unlock_irq(&spidev->spi_lock);
-
- if (spi == NULL)
+ if (spi == NULL) {
+ mutex_unlock(&spidev->spi_lock);
return -ESHUTDOWN;
+ }
/* SPI_IOC_MESSAGE needs the buffer locked "normally" */
mutex_lock(&spidev->buf_lock);
@@ -561,6 +572,7 @@ spidev_compat_ioc_message(struct file *filp, unsigned int cmd,
done:
mutex_unlock(&spidev->buf_lock);
spi_dev_put(spi);
+ mutex_unlock(&spidev->spi_lock);
return retval;
}
@@ -601,7 +613,6 @@ static int spidev_open(struct inode *inode, struct file *filp)
if (!spidev->tx_buffer) {
spidev->tx_buffer = kmalloc(bufsiz, GFP_KERNEL);
if (!spidev->tx_buffer) {
- dev_dbg(&spidev->spi->dev, "open/ENOMEM\n");
status = -ENOMEM;
goto err_find_dev;
}
@@ -610,7 +621,6 @@ static int spidev_open(struct inode *inode, struct file *filp)
if (!spidev->rx_buffer) {
spidev->rx_buffer = kmalloc(bufsiz, GFP_KERNEL);
if (!spidev->rx_buffer) {
- dev_dbg(&spidev->spi->dev, "open/ENOMEM\n");
status = -ENOMEM;
goto err_alloc_rx_buf;
}
@@ -640,10 +650,10 @@ static int spidev_release(struct inode *inode, struct file *filp)
spidev = filp->private_data;
filp->private_data = NULL;
- spin_lock_irq(&spidev->spi_lock);
+ mutex_lock(&spidev->spi_lock);
/* ... after we unbound from the underlying device? */
dofree = (spidev->spi == NULL);
- spin_unlock_irq(&spidev->spi_lock);
+ mutex_unlock(&spidev->spi_lock);
/* last close? */
spidev->users--;
@@ -776,7 +786,7 @@ static int spidev_probe(struct spi_device *spi)
/* Initialize the driver data */
spidev->spi = spi;
- spin_lock_init(&spidev->spi_lock);
+ mutex_init(&spidev->spi_lock);
mutex_init(&spidev->buf_lock);
INIT_LIST_HEAD(&spidev->device_entry);
@@ -821,9 +831,9 @@ static void spidev_remove(struct spi_device *spi)
/* prevent new opens */
mutex_lock(&device_list_lock);
/* make sure ops on existing fds can abort cleanly */
- spin_lock_irq(&spidev->spi_lock);
+ mutex_lock(&spidev->spi_lock);
spidev->spi = NULL;
- spin_unlock_irq(&spidev->spi_lock);
+ mutex_unlock(&spidev->spi_lock);
list_del(&spidev->device_entry);
device_destroy(spidev_class, spidev->devt);
diff --git a/drivers/staging/vc04_services/include/linux/raspberrypi/vchiq.h b/drivers/staging/vc04_services/include/linux/raspberrypi/vchiq.h
index db1441c0cc66..690ab7165b2c 100644
--- a/drivers/staging/vc04_services/include/linux/raspberrypi/vchiq.h
+++ b/drivers/staging/vc04_services/include/linux/raspberrypi/vchiq.h
@@ -86,7 +86,7 @@ struct vchiq_service_params_kernel {
struct vchiq_instance;
-extern enum vchiq_status vchiq_initialise(struct vchiq_instance **pinstance);
+extern int vchiq_initialise(struct vchiq_instance **pinstance);
extern enum vchiq_status vchiq_shutdown(struct vchiq_instance *instance);
extern enum vchiq_status vchiq_connect(struct vchiq_instance *instance);
extern enum vchiq_status vchiq_open_service(struct vchiq_instance *instance,
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h
index 2851ef6b9cd0..cd20eb18f275 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h
@@ -100,10 +100,10 @@ vchiq_dump_platform_use_state(struct vchiq_state *state);
extern void
vchiq_dump_service_use_state(struct vchiq_state *state);
-extern enum vchiq_status
+extern int
vchiq_use_internal(struct vchiq_state *state, struct vchiq_service *service,
enum USE_TYPE_E use_type);
-extern enum vchiq_status
+extern int
vchiq_release_internal(struct vchiq_state *state,
struct vchiq_service *service);
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index bac111456fa1..2b95b4550a63 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -73,8 +73,8 @@ static bool __target_check_io_state(struct se_cmd *se_cmd,
{
struct se_session *sess = se_cmd->se_sess;
- assert_spin_locked(&sess->sess_cmd_lock);
- WARN_ON_ONCE(!irqs_disabled());
+ lockdep_assert_held(&sess->sess_cmd_lock);
+
/*
* If command already reached CMD_T_COMPLETE state within
* target_complete_cmd() or CMD_T_FABRIC_STOP due to shutdown,
diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
index 27295bda3e0b..b1c6231defad 100644
--- a/drivers/tee/tee_shm.c
+++ b/drivers/tee/tee_shm.c
@@ -11,6 +11,7 @@
#include <linux/tee_drv.h>
#include <linux/uaccess.h>
#include <linux/uio.h>
+#include <linux/highmem.h>
#include "tee_private.h"
static void shm_put_kernel_pages(struct page **pages, size_t page_count)
@@ -24,38 +25,20 @@ static void shm_put_kernel_pages(struct page **pages, size_t page_count)
static int shm_get_kernel_pages(unsigned long start, size_t page_count,
struct page **pages)
{
+ struct page *page;
size_t n;
- int rc;
-
- if (is_vmalloc_addr((void *)start)) {
- struct page *page;
-
- for (n = 0; n < page_count; n++) {
- page = vmalloc_to_page((void *)(start + PAGE_SIZE * n));
- if (!page)
- return -ENOMEM;
- get_page(page);
- pages[n] = page;
- }
- rc = page_count;
- } else {
- struct kvec *kiov;
-
- kiov = kcalloc(page_count, sizeof(*kiov), GFP_KERNEL);
- if (!kiov)
- return -ENOMEM;
-
- for (n = 0; n < page_count; n++) {
- kiov[n].iov_base = (void *)(start + n * PAGE_SIZE);
- kiov[n].iov_len = PAGE_SIZE;
- }
+ if (WARN_ON_ONCE(is_vmalloc_addr((void *)start) ||
+ is_kmap_addr((void *)start)))
+ return -EINVAL;
- rc = get_kernel_pages(kiov, page_count, 0, pages);
- kfree(kiov);
+ page = virt_to_page(start);
+ for (n = 0; n < page_count; n++) {
+ pages[n] = page + n;
+ get_page(pages[n]);
}
- return rc;
+ return page_count;
}
static void release_registered_pages(struct tee_shm *shm)
diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
index 62c0aa5d0783..0a4eaa307156 100644
--- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
+++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
@@ -44,11 +44,13 @@ static int int340x_thermal_get_trip_temp(struct thermal_zone_device *zone,
int trip, int *temp)
{
struct int34x_thermal_zone *d = zone->devdata;
- int i;
+ int i, ret = 0;
if (d->override_ops && d->override_ops->get_trip_temp)
return d->override_ops->get_trip_temp(zone, trip, temp);
+ mutex_lock(&d->trip_mutex);
+
if (trip < d->aux_trip_nr)
*temp = d->aux_trips[trip];
else if (trip == d->crt_trip_id)
@@ -66,10 +68,12 @@ static int int340x_thermal_get_trip_temp(struct thermal_zone_device *zone,
}
}
if (i == INT340X_THERMAL_MAX_ACT_TRIP_COUNT)
- return -EINVAL;
+ ret = -EINVAL;
}
- return 0;
+ mutex_unlock(&d->trip_mutex);
+
+ return ret;
}
static int int340x_thermal_get_trip_type(struct thermal_zone_device *zone,
@@ -77,11 +81,13 @@ static int int340x_thermal_get_trip_type(struct thermal_zone_device *zone,
enum thermal_trip_type *type)
{
struct int34x_thermal_zone *d = zone->devdata;
- int i;
+ int i, ret = 0;
if (d->override_ops && d->override_ops->get_trip_type)
return d->override_ops->get_trip_type(zone, trip, type);
+ mutex_lock(&d->trip_mutex);
+
if (trip < d->aux_trip_nr)
*type = THERMAL_TRIP_PASSIVE;
else if (trip == d->crt_trip_id)
@@ -99,10 +105,12 @@ static int int340x_thermal_get_trip_type(struct thermal_zone_device *zone,
}
}
if (i == INT340X_THERMAL_MAX_ACT_TRIP_COUNT)
- return -EINVAL;
+ ret = -EINVAL;
}
- return 0;
+ mutex_unlock(&d->trip_mutex);
+
+ return ret;
}
static int int340x_thermal_set_trip_temp(struct thermal_zone_device *zone,
@@ -180,6 +188,8 @@ int int340x_thermal_read_trips(struct int34x_thermal_zone *int34x_zone)
int trip_cnt = int34x_zone->aux_trip_nr;
int i;
+ mutex_lock(&int34x_zone->trip_mutex);
+
int34x_zone->crt_trip_id = -1;
if (!int340x_thermal_get_trip_config(int34x_zone->adev->handle, "_CRT",
&int34x_zone->crt_temp))
@@ -207,6 +217,8 @@ int int340x_thermal_read_trips(struct int34x_thermal_zone *int34x_zone)
int34x_zone->act_trips[i].valid = true;
}
+ mutex_unlock(&int34x_zone->trip_mutex);
+
return trip_cnt;
}
EXPORT_SYMBOL_GPL(int340x_thermal_read_trips);
@@ -230,6 +242,8 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev,
if (!int34x_thermal_zone)
return ERR_PTR(-ENOMEM);
+ mutex_init(&int34x_thermal_zone->trip_mutex);
+
int34x_thermal_zone->adev = adev;
int34x_thermal_zone->override_ops = override_ops;
@@ -281,6 +295,7 @@ err_thermal_zone:
acpi_lpat_free_conversion_table(int34x_thermal_zone->lpat_table);
kfree(int34x_thermal_zone->aux_trips);
err_trip_alloc:
+ mutex_destroy(&int34x_thermal_zone->trip_mutex);
kfree(int34x_thermal_zone);
return ERR_PTR(ret);
}
@@ -292,6 +307,7 @@ void int340x_thermal_zone_remove(struct int34x_thermal_zone
thermal_zone_device_unregister(int34x_thermal_zone->zone);
acpi_lpat_free_conversion_table(int34x_thermal_zone->lpat_table);
kfree(int34x_thermal_zone->aux_trips);
+ mutex_destroy(&int34x_thermal_zone->trip_mutex);
kfree(int34x_thermal_zone);
}
EXPORT_SYMBOL_GPL(int340x_thermal_zone_remove);
diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h
index 3b4971df1b33..8f9872afd0d3 100644
--- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h
+++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h
@@ -32,6 +32,7 @@ struct int34x_thermal_zone {
struct thermal_zone_device_ops *override_ops;
void *priv_data;
struct acpi_lpat_conversion_table *lpat_table;
+ struct mutex trip_mutex;
};
struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *,
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c
index 8c42e7662033..92ed1213fe37 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c
@@ -172,6 +172,7 @@ static const struct attribute_group fivr_attribute_group = {
RFIM_SHOW(rfi_restriction_run_busy, 1)
RFIM_SHOW(rfi_restriction_err_code, 1)
RFIM_SHOW(rfi_restriction_data_rate, 1)
+RFIM_SHOW(rfi_restriction_data_rate_base, 1)
RFIM_SHOW(ddr_data_rate_point_0, 1)
RFIM_SHOW(ddr_data_rate_point_1, 1)
RFIM_SHOW(ddr_data_rate_point_2, 1)
@@ -181,11 +182,13 @@ RFIM_SHOW(rfi_disable, 1)
RFIM_STORE(rfi_restriction_run_busy, 1)
RFIM_STORE(rfi_restriction_err_code, 1)
RFIM_STORE(rfi_restriction_data_rate, 1)
+RFIM_STORE(rfi_restriction_data_rate_base, 1)
RFIM_STORE(rfi_disable, 1)
static DEVICE_ATTR_RW(rfi_restriction_run_busy);
static DEVICE_ATTR_RW(rfi_restriction_err_code);
static DEVICE_ATTR_RW(rfi_restriction_data_rate);
+static DEVICE_ATTR_RW(rfi_restriction_data_rate_base);
static DEVICE_ATTR_RO(ddr_data_rate_point_0);
static DEVICE_ATTR_RO(ddr_data_rate_point_1);
static DEVICE_ATTR_RO(ddr_data_rate_point_2);
@@ -248,6 +251,7 @@ static struct attribute *dvfs_attrs[] = {
&dev_attr_rfi_restriction_run_busy.attr,
&dev_attr_rfi_restriction_err_code.attr,
&dev_attr_rfi_restriction_data_rate.attr,
+ &dev_attr_rfi_restriction_data_rate_base.attr,
&dev_attr_ddr_data_rate_point_0.attr,
&dev_attr_ddr_data_rate_point_1.attr,
&dev_attr_ddr_data_rate_point_2.attr,
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index f17ab2316dbd..77bd47d976a2 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -909,15 +909,20 @@ __thermal_cooling_device_register(struct device_node *np,
cdev->devdata = devdata;
ret = cdev->ops->get_max_state(cdev, &cdev->max_state);
- if (ret)
- goto out_kfree_type;
+ if (ret) {
+ kfree(cdev->type);
+ goto out_ida_remove;
+ }
thermal_cooling_device_setup_sysfs(cdev);
+
ret = dev_set_name(&cdev->device, "cooling_device%d", cdev->id);
if (ret) {
+ kfree(cdev->type);
thermal_cooling_device_destroy_sysfs(cdev);
- goto out_kfree_type;
+ goto out_ida_remove;
}
+
ret = device_register(&cdev->device);
if (ret)
goto out_kfree_type;
@@ -943,6 +948,8 @@ out_kfree_type:
thermal_cooling_device_destroy_sysfs(cdev);
kfree(cdev->type);
put_device(&cdev->device);
+
+ /* thermal_release() takes care of the rest */
cdev = NULL;
out_ida_remove:
ida_free(&thermal_cdev_ida, id);
diff --git a/drivers/thunderbolt/retimer.c b/drivers/thunderbolt/retimer.c
index 81252e31014a..56008eb91e2e 100644
--- a/drivers/thunderbolt/retimer.c
+++ b/drivers/thunderbolt/retimer.c
@@ -427,13 +427,6 @@ int tb_retimer_scan(struct tb_port *port, bool add)
{
u32 status[TB_MAX_RETIMER_INDEX + 1] = {};
int ret, i, last_idx = 0;
- struct usb4_port *usb4;
-
- usb4 = port->usb4;
- if (!usb4)
- return 0;
-
- pm_runtime_get_sync(&usb4->dev);
/*
* Send broadcast RT to make sure retimer indices facing this
@@ -441,7 +434,7 @@ int tb_retimer_scan(struct tb_port *port, bool add)
*/
ret = usb4_port_enumerate_retimers(port);
if (ret)
- goto out;
+ return ret;
/*
* Enable sideband channel for each retimer. We can do this
@@ -471,12 +464,11 @@ int tb_retimer_scan(struct tb_port *port, bool add)
break;
}
- if (!last_idx) {
- ret = 0;
- goto out;
- }
+ if (!last_idx)
+ return 0;
/* Add on-board retimers if they do not exist already */
+ ret = 0;
for (i = 1; i <= last_idx; i++) {
struct tb_retimer *rt;
@@ -490,10 +482,6 @@ int tb_retimer_scan(struct tb_port *port, bool add)
}
}
-out:
- pm_runtime_mark_last_busy(&usb4->dev);
- pm_runtime_put_autosuspend(&usb4->dev);
-
return ret;
}
diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c
index 462845804427..3f1ab30c4fb1 100644
--- a/drivers/thunderbolt/tb.c
+++ b/drivers/thunderbolt/tb.c
@@ -628,11 +628,15 @@ static void tb_scan_port(struct tb_port *port)
* Downstream switch is reachable through two ports.
* Only scan on the primary port (link_nr == 0).
*/
+
+ if (port->usb4)
+ pm_runtime_get_sync(&port->usb4->dev);
+
if (tb_wait_for_port(port, false) <= 0)
- return;
+ goto out_rpm_put;
if (port->remote) {
tb_port_dbg(port, "port already has a remote\n");
- return;
+ goto out_rpm_put;
}
tb_retimer_scan(port, true);
@@ -647,12 +651,12 @@ static void tb_scan_port(struct tb_port *port)
*/
if (PTR_ERR(sw) == -EIO || PTR_ERR(sw) == -EADDRNOTAVAIL)
tb_scan_xdomain(port);
- return;
+ goto out_rpm_put;
}
if (tb_switch_configure(sw)) {
tb_switch_put(sw);
- return;
+ goto out_rpm_put;
}
/*
@@ -681,7 +685,7 @@ static void tb_scan_port(struct tb_port *port)
if (tb_switch_add(sw)) {
tb_switch_put(sw);
- return;
+ goto out_rpm_put;
}
/* Link the switches using both links if available */
@@ -733,6 +737,12 @@ static void tb_scan_port(struct tb_port *port)
tb_add_dp_resources(sw);
tb_scan_switch(sw);
+
+out_rpm_put:
+ if (port->usb4) {
+ pm_runtime_mark_last_busy(&port->usb4->dev);
+ pm_runtime_put_autosuspend(&port->usb4->dev);
+ }
}
static void tb_deactivate_and_free_tunnel(struct tb_tunnel *tunnel)
diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c
index 2c3cf7fc3357..1fc3c29b24f8 100644
--- a/drivers/thunderbolt/tunnel.c
+++ b/drivers/thunderbolt/tunnel.c
@@ -1275,7 +1275,7 @@ static void tb_usb3_reclaim_available_bandwidth(struct tb_tunnel *tunnel,
return;
} else if (!ret) {
/* Use maximum link rate if the link valid is not set */
- ret = usb4_usb3_port_max_link_rate(tunnel->src_port);
+ ret = tb_usb3_max_link_rate(tunnel->dst_port, tunnel->src_port);
if (ret < 0) {
tb_tunnel_warn(tunnel, "failed to read maximum link rate\n");
return;
diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c
index cfa83486c9da..3c51e47dd86b 100644
--- a/drivers/thunderbolt/xdomain.c
+++ b/drivers/thunderbolt/xdomain.c
@@ -1419,12 +1419,19 @@ static int tb_xdomain_get_properties(struct tb_xdomain *xd)
* registered, we notify the userspace that it has changed.
*/
if (!update) {
- struct tb_port *port;
+ /*
+ * Now disable lane 1 if bonding was not enabled. Do
+ * this only if bonding was possible at the beginning
+ * (that is we are the connection manager and there are
+ * two lanes).
+ */
+ if (xd->bonding_possible) {
+ struct tb_port *port;
- /* Now disable lane 1 if bonding was not enabled */
- port = tb_port_at(xd->route, tb_xdomain_parent(xd));
- if (!port->bonded)
- tb_port_disable(port->dual_link_port);
+ port = tb_port_at(xd->route, tb_xdomain_parent(xd));
+ if (!port->bonded)
+ tb_port_disable(port->dual_link_port);
+ }
if (device_add(&xd->dev)) {
dev_err(&xd->dev, "failed to add XDomain device\n");
diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c
index 7c23112dc923..5bddb2f5e931 100644
--- a/drivers/tty/hvc/hvc_xen.c
+++ b/drivers/tty/hvc/hvc_xen.c
@@ -52,17 +52,22 @@ static DEFINE_SPINLOCK(xencons_lock);
static struct xencons_info *vtermno_to_xencons(int vtermno)
{
- struct xencons_info *entry, *n, *ret = NULL;
+ struct xencons_info *entry, *ret = NULL;
+ unsigned long flags;
- if (list_empty(&xenconsoles))
- return NULL;
+ spin_lock_irqsave(&xencons_lock, flags);
+ if (list_empty(&xenconsoles)) {
+ spin_unlock_irqrestore(&xencons_lock, flags);
+ return NULL;
+ }
- list_for_each_entry_safe(entry, n, &xenconsoles, list) {
+ list_for_each_entry(entry, &xenconsoles, list) {
if (entry->vtermno == vtermno) {
ret = entry;
break;
}
}
+ spin_unlock_irqrestore(&xencons_lock, flags);
return ret;
}
@@ -223,7 +228,7 @@ static int xen_hvm_console_init(void)
{
int r;
uint64_t v = 0;
- unsigned long gfn;
+ unsigned long gfn, flags;
struct xencons_info *info;
if (!xen_hvm_domain())
@@ -258,9 +263,9 @@ static int xen_hvm_console_init(void)
goto err;
info->vtermno = HVC_COOKIE;
- spin_lock(&xencons_lock);
+ spin_lock_irqsave(&xencons_lock, flags);
list_add_tail(&info->list, &xenconsoles);
- spin_unlock(&xencons_lock);
+ spin_unlock_irqrestore(&xencons_lock, flags);
return 0;
err:
@@ -283,6 +288,7 @@ static int xencons_info_pv_init(struct xencons_info *info, int vtermno)
static int xen_pv_console_init(void)
{
struct xencons_info *info;
+ unsigned long flags;
if (!xen_pv_domain())
return -ENODEV;
@@ -299,9 +305,9 @@ static int xen_pv_console_init(void)
/* already configured */
return 0;
}
- spin_lock(&xencons_lock);
+ spin_lock_irqsave(&xencons_lock, flags);
xencons_info_pv_init(info, HVC_COOKIE);
- spin_unlock(&xencons_lock);
+ spin_unlock_irqrestore(&xencons_lock, flags);
return 0;
}
@@ -309,6 +315,7 @@ static int xen_pv_console_init(void)
static int xen_initial_domain_console_init(void)
{
struct xencons_info *info;
+ unsigned long flags;
if (!xen_initial_domain())
return -ENODEV;
@@ -323,9 +330,9 @@ static int xen_initial_domain_console_init(void)
info->irq = bind_virq_to_irq(VIRQ_CONSOLE, 0, false);
info->vtermno = HVC_COOKIE;
- spin_lock(&xencons_lock);
+ spin_lock_irqsave(&xencons_lock, flags);
list_add_tail(&info->list, &xenconsoles);
- spin_unlock(&xencons_lock);
+ spin_unlock_irqrestore(&xencons_lock, flags);
return 0;
}
@@ -380,10 +387,12 @@ static void xencons_free(struct xencons_info *info)
static int xen_console_remove(struct xencons_info *info)
{
+ unsigned long flags;
+
xencons_disconnect_backend(info);
- spin_lock(&xencons_lock);
+ spin_lock_irqsave(&xencons_lock, flags);
list_del(&info->list);
- spin_unlock(&xencons_lock);
+ spin_unlock_irqrestore(&xencons_lock, flags);
if (info->xbdev != NULL)
xencons_free(info);
else {
@@ -394,9 +403,9 @@ static int xen_console_remove(struct xencons_info *info)
return 0;
}
-static int xencons_remove(struct xenbus_device *dev)
+static void xencons_remove(struct xenbus_device *dev)
{
- return xen_console_remove(dev_get_drvdata(&dev->dev));
+ xen_console_remove(dev_get_drvdata(&dev->dev));
}
static int xencons_connect_backend(struct xenbus_device *dev,
@@ -464,6 +473,7 @@ static int xencons_probe(struct xenbus_device *dev,
{
int ret, devid;
struct xencons_info *info;
+ unsigned long flags;
devid = dev->nodename[strlen(dev->nodename) - 1] - '0';
if (devid == 0)
@@ -482,9 +492,9 @@ static int xencons_probe(struct xenbus_device *dev,
ret = xencons_connect_backend(dev, info);
if (ret < 0)
goto error;
- spin_lock(&xencons_lock);
+ spin_lock_irqsave(&xencons_lock, flags);
list_add_tail(&info->list, &xenconsoles);
- spin_unlock(&xencons_lock);
+ spin_unlock_irqrestore(&xencons_lock, flags);
return 0;
@@ -584,10 +594,12 @@ static int __init xen_hvc_init(void)
info->hvc = hvc_alloc(HVC_COOKIE, info->irq, ops, 256);
if (IS_ERR(info->hvc)) {
+ unsigned long flags;
+
r = PTR_ERR(info->hvc);
- spin_lock(&xencons_lock);
+ spin_lock_irqsave(&xencons_lock, flags);
list_del(&info->list);
- spin_unlock(&xencons_lock);
+ spin_unlock_irqrestore(&xencons_lock, flags);
if (info->irq)
unbind_from_irqhandler(info->irq, NULL);
kfree(info);
diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
index 37d6af2ec427..7fa66501792d 100644
--- a/drivers/tty/serial/8250/8250_dma.c
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -43,15 +43,23 @@ static void __dma_rx_complete(struct uart_8250_port *p)
struct uart_8250_dma *dma = p->dma;
struct tty_port *tty_port = &p->port.state->port;
struct dma_tx_state state;
+ enum dma_status dma_status;
int count;
- dma->rx_running = 0;
- dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state);
+ /*
+ * New DMA Rx can be started during the completion handler before it
+ * could acquire port's lock and it might still be ongoing. Don't to
+ * anything in such case.
+ */
+ dma_status = dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state);
+ if (dma_status == DMA_IN_PROGRESS)
+ return;
count = dma->rx_size - state.residue;
tty_insert_flip_string(tty_port, dma->rx_buf, count);
p->port.icount.rx += count;
+ dma->rx_running = 0;
tty_flip_buffer_push(tty_port);
}
@@ -62,9 +70,14 @@ static void dma_rx_complete(void *param)
struct uart_8250_dma *dma = p->dma;
unsigned long flags;
- __dma_rx_complete(p);
-
spin_lock_irqsave(&p->port.lock, flags);
+ if (dma->rx_running)
+ __dma_rx_complete(p);
+
+ /*
+ * Cannot be combined with the previous check because __dma_rx_complete()
+ * changes dma->rx_running.
+ */
if (!dma->rx_running && (serial_lsr_in(p) & UART_LSR_DR))
p->dma->rx_dma(p);
spin_unlock_irqrestore(&p->port.lock, flags);
diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c
index 314a05e009df..64770c62bbec 100644
--- a/drivers/tty/serial/8250/8250_exar.c
+++ b/drivers/tty/serial/8250/8250_exar.c
@@ -43,6 +43,12 @@
#define PCI_DEVICE_ID_EXAR_XR17V4358 0x4358
#define PCI_DEVICE_ID_EXAR_XR17V8358 0x8358
+#define PCI_DEVICE_ID_SEALEVEL_710xC 0x1001
+#define PCI_DEVICE_ID_SEALEVEL_720xC 0x1002
+#define PCI_DEVICE_ID_SEALEVEL_740xC 0x1004
+#define PCI_DEVICE_ID_SEALEVEL_780xC 0x1008
+#define PCI_DEVICE_ID_SEALEVEL_716xC 0x1010
+
#define UART_EXAR_INT0 0x80
#define UART_EXAR_8XMODE 0x88 /* 8X sampling rate select */
#define UART_EXAR_SLEEP 0x8b /* Sleep mode */
@@ -638,6 +644,8 @@ exar_pci_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
nr_ports = BIT(((pcidev->device & 0x38) >> 3) - 1);
else if (board->num_ports)
nr_ports = board->num_ports;
+ else if (pcidev->vendor == PCI_VENDOR_ID_SEALEVEL)
+ nr_ports = pcidev->device & 0xff;
else
nr_ports = pcidev->device & 0x0f;
@@ -864,6 +872,12 @@ static const struct pci_device_id exar_pci_tbl[] = {
EXAR_DEVICE(COMMTECH, 4224PCI335, pbn_fastcom335_4),
EXAR_DEVICE(COMMTECH, 2324PCI335, pbn_fastcom335_4),
EXAR_DEVICE(COMMTECH, 2328PCI335, pbn_fastcom335_8),
+
+ EXAR_DEVICE(SEALEVEL, 710xC, pbn_exar_XR17V35x),
+ EXAR_DEVICE(SEALEVEL, 720xC, pbn_exar_XR17V35x),
+ EXAR_DEVICE(SEALEVEL, 740xC, pbn_exar_XR17V35x),
+ EXAR_DEVICE(SEALEVEL, 780xC, pbn_exar_XR17V35x),
+ EXAR_DEVICE(SEALEVEL, 716xC, pbn_exar_XR17V35x),
{ 0, }
};
MODULE_DEVICE_TABLE(pci, exar_pci_tbl);
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index d75c39f4622b..d8c2f3455eeb 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -1466,6 +1466,10 @@ static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq)
struct circ_buf *xmit = &uap->port.state->xmit;
int count = uap->fifosize >> 1;
+ if ((uap->port.rs485.flags & SER_RS485_ENABLED) &&
+ !uap->rs485_tx_started)
+ pl011_rs485_tx_start(uap);
+
if (uap->port.x_char) {
if (!pl011_tx_char(uap, uap->port.x_char, from_irq))
return true;
@@ -1477,10 +1481,6 @@ static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq)
return false;
}
- if ((uap->port.rs485.flags & SER_RS485_ENABLED) &&
- !uap->rs485_tx_started)
- pl011_rs485_tx_start(uap);
-
/* If we are using DMA mode, try to send some characters. */
if (pl011_dma_tx_irq(uap))
return true;
diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index f1c06e12efa0..9cd7479b03c0 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -2657,13 +2657,7 @@ static void __init atmel_console_get_options(struct uart_port *port, int *baud,
else if (mr == ATMEL_US_PAR_ODD)
*parity = 'o';
- /*
- * The serial core only rounds down when matching this to a
- * supported baud rate. Make sure we don't end up slightly
- * lower than one of those, as it would make us fall through
- * to a much lower baud rate than we really want.
- */
- *baud = port->uartclk / (16 * (quot - 1));
+ *baud = port->uartclk / (16 * quot);
}
static int __init atmel_console_setup(struct console *co, char *options)
diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c
index a3ed9b34e2ab..7ce7bb164005 100644
--- a/drivers/tty/serial/kgdboc.c
+++ b/drivers/tty/serial/kgdboc.c
@@ -171,6 +171,7 @@ static int configure_kgdboc(void)
int err = -ENODEV;
char *cptr = config;
struct console *cons;
+ int cookie;
if (!strlen(config) || isspace(config[0])) {
err = 0;
@@ -189,20 +190,9 @@ static int configure_kgdboc(void)
if (kgdboc_register_kbd(&cptr))
goto do_register;
- /*
- * tty_find_polling_driver() can call uart_set_options()
- * (via poll_init) to configure the uart. Take the console_list_lock
- * in order to synchronize against register_console(), which can also
- * configure the uart via uart_set_options(). This also allows safe
- * traversal of the console list.
- */
- console_list_lock();
-
p = tty_find_polling_driver(cptr, &tty_line);
- if (!p) {
- console_list_unlock();
+ if (!p)
goto noconfig;
- }
/*
* Take console_lock to serialize device() callback with
@@ -211,7 +201,8 @@ static int configure_kgdboc(void)
*/
console_lock();
- for_each_console(cons) {
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(cons) {
int idx;
if (cons->device && cons->device(cons, &idx) == p &&
idx == tty_line) {
@@ -219,11 +210,10 @@ static int configure_kgdboc(void)
break;
}
}
+ console_srcu_read_unlock(cookie);
console_unlock();
- console_list_unlock();
-
kgdb_tty_driver = p;
kgdb_tty_line = tty_line;
diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c
index 3d54a43768cd..9576ba8bbc40 100644
--- a/drivers/tty/serial/pch_uart.c
+++ b/drivers/tty/serial/pch_uart.c
@@ -749,7 +749,7 @@ static void pch_dma_tx_complete(void *arg)
uart_xmit_advance(port, sg_dma_len(sg));
async_tx_ack(priv->desc_tx);
- dma_unmap_sg(port->dev, sg, priv->orig_nent, DMA_TO_DEVICE);
+ dma_unmap_sg(port->dev, priv->sg_tx_p, priv->orig_nent, DMA_TO_DEVICE);
priv->tx_dma_use = 0;
priv->nent = 0;
priv->orig_nent = 0;
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index b487823f0e61..57f04f8bf504 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -864,9 +864,10 @@ out_unlock:
return IRQ_HANDLED;
}
-static void get_tx_fifo_size(struct qcom_geni_serial_port *port)
+static int setup_fifos(struct qcom_geni_serial_port *port)
{
struct uart_port *uport;
+ u32 old_rx_fifo_depth = port->rx_fifo_depth;
uport = &port->uport;
port->tx_fifo_depth = geni_se_get_tx_fifo_depth(&port->se);
@@ -874,6 +875,16 @@ static void get_tx_fifo_size(struct qcom_geni_serial_port *port)
port->rx_fifo_depth = geni_se_get_rx_fifo_depth(&port->se);
uport->fifosize =
(port->tx_fifo_depth * port->tx_fifo_width) / BITS_PER_BYTE;
+
+ if (port->rx_fifo && (old_rx_fifo_depth != port->rx_fifo_depth) && port->rx_fifo_depth) {
+ port->rx_fifo = devm_krealloc(uport->dev, port->rx_fifo,
+ port->rx_fifo_depth * sizeof(u32),
+ GFP_KERNEL);
+ if (!port->rx_fifo)
+ return -ENOMEM;
+ }
+
+ return 0;
}
@@ -888,6 +899,7 @@ static int qcom_geni_serial_port_setup(struct uart_port *uport)
u32 rxstale = DEFAULT_BITS_PER_CHAR * STALE_TIMEOUT;
u32 proto;
u32 pin_swap;
+ int ret;
proto = geni_se_read_proto(&port->se);
if (proto != GENI_SE_UART) {
@@ -897,7 +909,9 @@ static int qcom_geni_serial_port_setup(struct uart_port *uport)
qcom_geni_serial_stop_rx(uport);
- get_tx_fifo_size(port);
+ ret = setup_fifos(port);
+ if (ret)
+ return ret;
writel(rxstale, uport->membase + SE_UART_RX_STALE_CNT);
@@ -1516,7 +1530,7 @@ static int qcom_geni_serial_remove(struct platform_device *pdev)
return 0;
}
-static int __maybe_unused qcom_geni_serial_sys_suspend(struct device *dev)
+static int qcom_geni_serial_sys_suspend(struct device *dev)
{
struct qcom_geni_serial_port *port = dev_get_drvdata(dev);
struct uart_port *uport = &port->uport;
@@ -1533,7 +1547,7 @@ static int __maybe_unused qcom_geni_serial_sys_suspend(struct device *dev)
return uart_suspend_port(private_data->drv, uport);
}
-static int __maybe_unused qcom_geni_serial_sys_resume(struct device *dev)
+static int qcom_geni_serial_sys_resume(struct device *dev)
{
int ret;
struct qcom_geni_serial_port *port = dev_get_drvdata(dev);
@@ -1581,10 +1595,12 @@ static int qcom_geni_serial_sys_hib_resume(struct device *dev)
}
static const struct dev_pm_ops qcom_geni_serial_pm_ops = {
- SET_SYSTEM_SLEEP_PM_OPS(qcom_geni_serial_sys_suspend,
- qcom_geni_serial_sys_resume)
- .restore = qcom_geni_serial_sys_hib_resume,
- .thaw = qcom_geni_serial_sys_hib_resume,
+ .suspend = pm_sleep_ptr(qcom_geni_serial_sys_suspend),
+ .resume = pm_sleep_ptr(qcom_geni_serial_sys_resume),
+ .freeze = pm_sleep_ptr(qcom_geni_serial_sys_suspend),
+ .poweroff = pm_sleep_ptr(qcom_geni_serial_sys_suspend),
+ .restore = pm_sleep_ptr(qcom_geni_serial_sys_hib_resume),
+ .thaw = pm_sleep_ptr(qcom_geni_serial_sys_hib_resume),
};
static const struct of_device_id qcom_geni_serial_match_table[] = {
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index b9fbbee598b8..ec874f3a567c 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -2212,6 +2212,9 @@ EXPORT_SYMBOL_GPL(uart_parse_options);
* @parity: parity character - 'n' (none), 'o' (odd), 'e' (even)
* @bits: number of data bits
* @flow: flow control character - 'r' (rts)
+ *
+ * Locking: Caller must hold console_list_lock in order to serialize
+ * early initialization of the serial-console lock.
*/
int
uart_set_options(struct uart_port *port, struct console *co,
@@ -2619,7 +2622,9 @@ static int uart_poll_init(struct tty_driver *driver, int line, char *options)
if (!ret && options) {
uart_parse_options(options, &baud, &parity, &bits, &flow);
+ console_list_lock();
ret = uart_set_options(port, NULL, baud, parity, bits, flow);
+ console_list_unlock();
}
out:
mutex_unlock(&tport->mutex);
diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
index a1490033aa16..409e91d6829a 100644
--- a/drivers/tty/serial/stm32-usart.c
+++ b/drivers/tty/serial/stm32-usart.c
@@ -797,25 +797,11 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr)
spin_unlock(&port->lock);
}
- if (stm32_usart_rx_dma_enabled(port))
- return IRQ_WAKE_THREAD;
- else
- return IRQ_HANDLED;
-}
-
-static irqreturn_t stm32_usart_threaded_interrupt(int irq, void *ptr)
-{
- struct uart_port *port = ptr;
- struct tty_port *tport = &port->state->port;
- struct stm32_port *stm32_port = to_stm32_port(port);
- unsigned int size;
- unsigned long flags;
-
/* Receiver timeout irq for DMA RX */
- if (!stm32_port->throttled) {
- spin_lock_irqsave(&port->lock, flags);
+ if (stm32_usart_rx_dma_enabled(port) && !stm32_port->throttled) {
+ spin_lock(&port->lock);
size = stm32_usart_receive_chars(port, false);
- uart_unlock_and_check_sysrq_irqrestore(port, flags);
+ uart_unlock_and_check_sysrq(port);
if (size)
tty_flip_buffer_push(tport);
}
@@ -1015,10 +1001,8 @@ static int stm32_usart_startup(struct uart_port *port)
u32 val;
int ret;
- ret = request_threaded_irq(port->irq, stm32_usart_interrupt,
- stm32_usart_threaded_interrupt,
- IRQF_ONESHOT | IRQF_NO_SUSPEND,
- name, port);
+ ret = request_irq(port->irq, stm32_usart_interrupt,
+ IRQF_NO_SUSPEND, name, port);
if (ret)
return ret;
@@ -1601,13 +1585,6 @@ static int stm32_usart_of_dma_rx_probe(struct stm32_port *stm32port,
struct dma_slave_config config;
int ret;
- /*
- * Using DMA and threaded handler for the console could lead to
- * deadlocks.
- */
- if (uart_console(port))
- return -ENODEV;
-
stm32port->rx_buf = dma_alloc_coherent(dev, RX_BUF_L,
&stm32port->rx_dma_buf,
GFP_KERNEL);
diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c
index 1850bacdb5b0..f566eb1839dc 100644
--- a/drivers/tty/vt/vc_screen.c
+++ b/drivers/tty/vt/vc_screen.c
@@ -386,10 +386,6 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
uni_mode = use_unicode(inode);
attr = use_attributes(inode);
- ret = -ENXIO;
- vc = vcs_vc(inode, &viewed);
- if (!vc)
- goto unlock_out;
ret = -EINVAL;
if (pos < 0)
@@ -407,6 +403,11 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
unsigned int this_round, skip = 0;
int size;
+ ret = -ENXIO;
+ vc = vcs_vc(inode, &viewed);
+ if (!vc)
+ goto unlock_out;
+
/* Check whether we are above size each round,
* as copy_to_user at the end of this loop
* could sleep.
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index e18c9f4463ec..3a1c4d31e010 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -1234,12 +1234,14 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)
* clock scaling is in progress
*/
ufshcd_scsi_block_requests(hba);
+ mutex_lock(&hba->wb_mutex);
down_write(&hba->clk_scaling_lock);
if (!hba->clk_scaling.is_allowed ||
ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) {
ret = -EBUSY;
up_write(&hba->clk_scaling_lock);
+ mutex_unlock(&hba->wb_mutex);
ufshcd_scsi_unblock_requests(hba);
goto out;
}
@@ -1251,12 +1253,16 @@ out:
return ret;
}
-static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, bool writelock)
+static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err, bool scale_up)
{
- if (writelock)
- up_write(&hba->clk_scaling_lock);
- else
- up_read(&hba->clk_scaling_lock);
+ up_write(&hba->clk_scaling_lock);
+
+ /* Enable Write Booster if we have scaled up else disable it */
+ if (ufshcd_enable_wb_if_scaling_up(hba) && !err)
+ ufshcd_wb_toggle(hba, scale_up);
+
+ mutex_unlock(&hba->wb_mutex);
+
ufshcd_scsi_unblock_requests(hba);
ufshcd_release(hba);
}
@@ -1273,7 +1279,6 @@ static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, bool writelock)
static int ufshcd_devfreq_scale(struct ufs_hba *hba, bool scale_up)
{
int ret = 0;
- bool is_writelock = true;
ret = ufshcd_clock_scaling_prepare(hba);
if (ret)
@@ -1302,15 +1307,8 @@ static int ufshcd_devfreq_scale(struct ufs_hba *hba, bool scale_up)
}
}
- /* Enable Write Booster if we have scaled up else disable it */
- if (ufshcd_enable_wb_if_scaling_up(hba)) {
- downgrade_write(&hba->clk_scaling_lock);
- is_writelock = false;
- ufshcd_wb_toggle(hba, scale_up);
- }
-
out_unprepare:
- ufshcd_clock_scaling_unprepare(hba, is_writelock);
+ ufshcd_clock_scaling_unprepare(hba, ret, scale_up);
return ret;
}
@@ -6056,11 +6054,21 @@ void ufshcd_schedule_eh_work(struct ufs_hba *hba)
}
}
+static void ufshcd_force_error_recovery(struct ufs_hba *hba)
+{
+ spin_lock_irq(hba->host->host_lock);
+ hba->force_reset = true;
+ ufshcd_schedule_eh_work(hba);
+ spin_unlock_irq(hba->host->host_lock);
+}
+
static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow)
{
+ mutex_lock(&hba->wb_mutex);
down_write(&hba->clk_scaling_lock);
hba->clk_scaling.is_allowed = allow;
up_write(&hba->clk_scaling_lock);
+ mutex_unlock(&hba->wb_mutex);
}
static void ufshcd_clk_scaling_suspend(struct ufs_hba *hba, bool suspend)
@@ -9083,6 +9091,15 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
if (!hba->dev_info.b_rpm_dev_flush_capable) {
ret = ufshcd_set_dev_pwr_mode(hba, req_dev_pwr_mode);
+ if (ret && pm_op != UFS_SHUTDOWN_PM) {
+ /*
+ * If return err in suspend flow, IO will hang.
+ * Trigger error handler and break suspend for
+ * error recovery.
+ */
+ ufshcd_force_error_recovery(hba);
+ ret = -EBUSY;
+ }
if (ret)
goto enable_scaling;
}
@@ -9094,6 +9111,15 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
*/
check_for_bkops = !ufshcd_is_ufs_dev_deepsleep(hba);
ret = ufshcd_link_state_transition(hba, req_link_state, check_for_bkops);
+ if (ret && pm_op != UFS_SHUTDOWN_PM) {
+ /*
+ * If return err in suspend flow, IO will hang.
+ * Trigger error handler and break suspend for
+ * error recovery.
+ */
+ ufshcd_force_error_recovery(hba);
+ ret = -EBUSY;
+ }
if (ret)
goto set_dev_active;
@@ -9767,6 +9793,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
/* Initialize mutex for exception event control */
mutex_init(&hba->ee_ctrl_mutex);
+ mutex_init(&hba->wb_mutex);
init_rwsem(&hba->clk_scaling_lock);
ufshcd_init_clk_gating(hba);
diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c
index 5adcb349718c..ccfaebca6faa 100644
--- a/drivers/usb/cdns3/cdns3-gadget.c
+++ b/drivers/usb/cdns3/cdns3-gadget.c
@@ -2614,6 +2614,7 @@ int cdns3_gadget_ep_dequeue(struct usb_ep *ep,
u8 req_on_hw_ring = 0;
unsigned long flags;
int ret = 0;
+ int val;
if (!ep || !request || !ep->desc)
return -EINVAL;
@@ -2649,6 +2650,13 @@ found:
/* Update ring only if removed request is on pending_req_list list */
if (req_on_hw_ring && link_trb) {
+ /* Stop DMA */
+ writel(EP_CMD_DFLUSH, &priv_dev->regs->ep_cmd);
+
+ /* wait for DFLUSH cleared */
+ readl_poll_timeout_atomic(&priv_dev->regs->ep_cmd, val,
+ !(val & EP_CMD_DFLUSH), 1, 1000);
+
link_trb->buffer = cpu_to_le32(TRB_BUFFER(priv_ep->trb_pool_dma +
((priv_req->end_trb + 1) * TRB_SIZE)));
link_trb->control = cpu_to_le32((le32_to_cpu(link_trb->control) & TRB_CYCLE) |
@@ -2660,6 +2668,10 @@ found:
cdns3_gadget_giveback(priv_ep, priv_req, -ECONNRESET);
+ req = cdns3_next_request(&priv_ep->pending_req_list);
+ if (req)
+ cdns3_rearm_transfer(priv_ep, 1);
+
not_found:
spin_unlock_irqrestore(&priv_dev->lock, flags);
return ret;
diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 484b1cd23431..27c601296130 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -1294,12 +1294,12 @@ static void ci_extcon_wakeup_int(struct ci_hdrc *ci)
cable_id = &ci->platdata->id_extcon;
cable_vbus = &ci->platdata->vbus_extcon;
- if ((!IS_ERR(cable_id->edev) || !IS_ERR(ci->role_switch))
+ if ((!IS_ERR(cable_id->edev) || ci->role_switch)
&& ci->is_otg &&
(otgsc & OTGSC_IDIE) && (otgsc & OTGSC_IDIS))
ci_irq(ci);
- if ((!IS_ERR(cable_vbus->edev) || !IS_ERR(ci->role_switch))
+ if ((!IS_ERR(cable_vbus->edev) || ci->role_switch)
&& ci->is_otg &&
(otgsc & OTGSC_BSVIE) && (otgsc & OTGSC_BSVIS))
ci_irq(ci);
diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c
index 60e8174686a1..d7c8461976ce 100644
--- a/drivers/usb/common/ulpi.c
+++ b/drivers/usb/common/ulpi.c
@@ -207,7 +207,7 @@ static int ulpi_read_id(struct ulpi *ulpi)
/* Test the interface */
ret = ulpi_write(ulpi, ULPI_SCRATCH, 0xaa);
if (ret < 0)
- return ret;
+ goto err;
ret = ulpi_read(ulpi, ULPI_SCRATCH);
if (ret < 0)
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 77e73fc8d673..9eca403af2a8 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -44,6 +44,9 @@
#define USB_PRODUCT_USB5534B 0x5534
#define USB_VENDOR_CYPRESS 0x04b4
#define USB_PRODUCT_CY7C65632 0x6570
+#define USB_VENDOR_TEXAS_INSTRUMENTS 0x0451
+#define USB_PRODUCT_TUSB8041_USB3 0x8140
+#define USB_PRODUCT_TUSB8041_USB2 0x8142
#define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND 0x01
#define HUB_QUIRK_DISABLE_AUTOSUSPEND 0x02
@@ -5854,6 +5857,16 @@ static const struct usb_device_id hub_id_table[] = {
.idVendor = USB_VENDOR_GENESYS_LOGIC,
.bInterfaceClass = USB_CLASS_HUB,
.driver_info = HUB_QUIRK_CHECK_PORT_AUTOSUSPEND},
+ { .match_flags = USB_DEVICE_ID_MATCH_VENDOR
+ | USB_DEVICE_ID_MATCH_PRODUCT,
+ .idVendor = USB_VENDOR_TEXAS_INSTRUMENTS,
+ .idProduct = USB_PRODUCT_TUSB8041_USB2,
+ .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND},
+ { .match_flags = USB_DEVICE_ID_MATCH_VENDOR
+ | USB_DEVICE_ID_MATCH_PRODUCT,
+ .idVendor = USB_VENDOR_TEXAS_INSTRUMENTS,
+ .idProduct = USB_PRODUCT_TUSB8041_USB3,
+ .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND},
{ .match_flags = USB_DEVICE_ID_MATCH_DEV_CLASS,
.bDeviceClass = USB_CLASS_HUB},
{ .match_flags = USB_DEVICE_ID_MATCH_INT_CLASS,
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 079e183cf3bf..934b3d997702 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -526,6 +526,9 @@ static const struct usb_device_id usb_quirk_list[] = {
/* DJI CineSSD */
{ USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM },
+ /* Alcor Link AK9563 SC Reader used in 2022 Lenovo ThinkPads */
+ { USB_DEVICE(0x2ce3, 0x9563), .driver_info = USB_QUIRK_NO_LPM },
+
/* DELL USB GEN2 */
{ USB_DEVICE(0x413c, 0xb062), .driver_info = USB_QUIRK_NO_LPM | USB_QUIRK_RESET_RESUME },
diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c
index 6d93428432f1..533baa85083c 100644
--- a/drivers/usb/core/usb-acpi.c
+++ b/drivers/usb/core/usb-acpi.c
@@ -37,6 +37,71 @@ bool usb_acpi_power_manageable(struct usb_device *hdev, int index)
}
EXPORT_SYMBOL_GPL(usb_acpi_power_manageable);
+#define UUID_USB_CONTROLLER_DSM "ce2ee385-00e6-48cb-9f05-2edb927c4899"
+#define USB_DSM_DISABLE_U1_U2_FOR_PORT 5
+
+/**
+ * usb_acpi_port_lpm_incapable - check if lpm should be disabled for a port.
+ * @hdev: USB device belonging to the usb hub
+ * @index: zero based port index
+ *
+ * Some USB3 ports may not support USB3 link power management U1/U2 states
+ * due to different retimer setup. ACPI provides _DSM method which returns 0x01
+ * if U1 and U2 states should be disabled. Evaluate _DSM with:
+ * Arg0: UUID = ce2ee385-00e6-48cb-9f05-2edb927c4899
+ * Arg1: Revision ID = 0
+ * Arg2: Function Index = 5
+ * Arg3: (empty)
+ *
+ * Return 1 if USB3 port is LPM incapable, negative on error, otherwise 0
+ */
+
+int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index)
+{
+ union acpi_object *obj;
+ acpi_handle port_handle;
+ int port1 = index + 1;
+ guid_t guid;
+ int ret;
+
+ ret = guid_parse(UUID_USB_CONTROLLER_DSM, &guid);
+ if (ret)
+ return ret;
+
+ port_handle = usb_get_hub_port_acpi_handle(hdev, port1);
+ if (!port_handle) {
+ dev_dbg(&hdev->dev, "port-%d no acpi handle\n", port1);
+ return -ENODEV;
+ }
+
+ if (!acpi_check_dsm(port_handle, &guid, 0,
+ BIT(USB_DSM_DISABLE_U1_U2_FOR_PORT))) {
+ dev_dbg(&hdev->dev, "port-%d no _DSM function %d\n",
+ port1, USB_DSM_DISABLE_U1_U2_FOR_PORT);
+ return -ENODEV;
+ }
+
+ obj = acpi_evaluate_dsm(port_handle, &guid, 0,
+ USB_DSM_DISABLE_U1_U2_FOR_PORT, NULL);
+
+ if (!obj)
+ return -ENODEV;
+
+ if (obj->type != ACPI_TYPE_INTEGER) {
+ dev_dbg(&hdev->dev, "evaluate port-%d _DSM failed\n", port1);
+ ACPI_FREE(obj);
+ return -EINVAL;
+ }
+
+ if (obj->integer.value == 0x01)
+ ret = 1;
+
+ ACPI_FREE(obj);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(usb_acpi_port_lpm_incapable);
+
/**
* usb_acpi_set_power_state - control usb port's power via acpi power
* resource
diff --git a/drivers/usb/dwc3/Kconfig b/drivers/usb/dwc3/Kconfig
index b2f72b0e75c6..be954a9abbe0 100644
--- a/drivers/usb/dwc3/Kconfig
+++ b/drivers/usb/dwc3/Kconfig
@@ -3,6 +3,7 @@
config USB_DWC3
tristate "DesignWare USB3 DRD Core Support"
depends on (USB || USB_GADGET) && HAS_DMA
+ depends on (EXTCON || EXTCON=n)
select USB_XHCI_PLATFORM if USB_XHCI_HCD
select USB_ROLE_SWITCH if USB_DWC3_DUAL_ROLE
help
@@ -44,7 +45,6 @@ config USB_DWC3_GADGET
config USB_DWC3_DUAL_ROLE
bool "Dual Role mode"
depends on ((USB=y || USB=USB_DWC3) && (USB_GADGET=y || USB_GADGET=USB_DWC3))
- depends on (EXTCON=y || EXTCON=USB_DWC3)
help
This is the default mode of working of DWC3 controller where
both host and gadget features are enabled.
diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c
index b0a0351d2d8b..959fc925ca7c 100644
--- a/drivers/usb/dwc3/dwc3-qcom.c
+++ b/drivers/usb/dwc3/dwc3-qcom.c
@@ -901,7 +901,7 @@ static int dwc3_qcom_probe(struct platform_device *pdev)
qcom->mode = usb_get_dr_mode(&qcom->dwc3->dev);
/* enable vbus override for device mode */
- if (qcom->mode == USB_DR_MODE_PERIPHERAL)
+ if (qcom->mode != USB_DR_MODE_HOST)
dwc3_qcom_vbus_override_enable(qcom, true);
/* register extcon to override sw_vbus on Vbus change later */
diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c
index 8607d4c23283..0745e9f11b2e 100644
--- a/drivers/usb/dwc3/dwc3-xilinx.c
+++ b/drivers/usb/dwc3/dwc3-xilinx.c
@@ -13,6 +13,7 @@
#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/dma-mapping.h>
+#include <linux/gpio/consumer.h>
#include <linux/of_gpio.h>
#include <linux/of_platform.h>
#include <linux/pm_runtime.h>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 789976567f9f..89dcfac01235 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1727,6 +1727,7 @@ static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool int
else if (!ret)
dep->flags |= DWC3_EP_END_TRANSFER_PENDING;
+ dep->flags &= ~DWC3_EP_DELAY_STOP;
return ret;
}
@@ -3732,8 +3733,10 @@ void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force,
if (dep->number <= 1 && dwc->ep0state != EP0_DATA_PHASE)
return;
+ if (interrupt && (dep->flags & DWC3_EP_DELAY_STOP))
+ return;
+
if (!(dep->flags & DWC3_EP_TRANSFER_STARTED) ||
- (dep->flags & DWC3_EP_DELAY_STOP) ||
(dep->flags & DWC3_EP_END_TRANSFER_PENDING))
return;
diff --git a/drivers/usb/fotg210/fotg210-core.c b/drivers/usb/fotg210/fotg210-core.c
index 8a54edf921ac..ee740a6da463 100644
--- a/drivers/usb/fotg210/fotg210-core.c
+++ b/drivers/usb/fotg210/fotg210-core.c
@@ -144,10 +144,7 @@ static struct platform_driver fotg210_driver = {
static int __init fotg210_init(void)
{
- if (usb_disabled())
- return -ENODEV;
-
- if (IS_ENABLED(CONFIG_USB_FOTG210_HCD))
+ if (IS_ENABLED(CONFIG_USB_FOTG210_HCD) && !usb_disabled())
fotg210_hcd_init();
return platform_driver_register(&fotg210_driver);
}
diff --git a/drivers/usb/fotg210/fotg210-udc.c b/drivers/usb/fotg210/fotg210-udc.c
index 66e1b7ee3346..eb076746f032 100644
--- a/drivers/usb/fotg210/fotg210-udc.c
+++ b/drivers/usb/fotg210/fotg210-udc.c
@@ -1014,7 +1014,6 @@ static int fotg210_udc_start(struct usb_gadget *g,
int ret;
/* hook up the driver */
- driver->driver.bus = NULL;
fotg210->driver = driver;
if (!IS_ERR_OR_NULL(fotg210->phy)) {
@@ -1201,6 +1200,8 @@ int fotg210_udc_probe(struct platform_device *pdev)
dev_info(dev, "found and initialized PHY\n");
}
+ ret = -ENOMEM;
+
for (i = 0; i < FOTG210_MAX_NUM_EP; i++) {
fotg210->ep[i] = kzalloc(sizeof(struct fotg210_ep), GFP_KERNEL);
if (!fotg210->ep[i])
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 96121d1c8df4..0853536cbf2e 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -393,6 +393,7 @@ static void gadget_info_attr_release(struct config_item *item)
WARN_ON(!list_empty(&gi->string_list));
WARN_ON(!list_empty(&gi->available_func));
kfree(gi->composite.gadget_driver.function);
+ kfree(gi->composite.gadget_driver.driver.name);
kfree(gi);
}
@@ -1572,7 +1573,6 @@ static const struct usb_gadget_driver configfs_driver_template = {
.max_speed = USB_SPEED_SUPER_PLUS,
.driver = {
.owner = THIS_MODULE,
- .name = "configfs-gadget",
},
.match_existing_only = 1,
};
@@ -1623,13 +1623,21 @@ static struct config_group *gadgets_make(
gi->composite.gadget_driver = configfs_driver_template;
+ gi->composite.gadget_driver.driver.name = kasprintf(GFP_KERNEL,
+ "configfs-gadget.%s", name);
+ if (!gi->composite.gadget_driver.driver.name)
+ goto err;
+
gi->composite.gadget_driver.function = kstrdup(name, GFP_KERNEL);
gi->composite.name = gi->composite.gadget_driver.function;
if (!gi->composite.gadget_driver.function)
- goto err;
+ goto out_free_driver_name;
return &gi->group;
+
+out_free_driver_name:
+ kfree(gi->composite.gadget_driver.driver.name);
err:
kfree(gi);
return ERR_PTR(-ENOMEM);
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 73dc10a77cde..8ad354741380 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -279,6 +279,11 @@ static int __ffs_ep0_queue_wait(struct ffs_data *ffs, char *data, size_t len)
struct usb_request *req = ffs->ep0req;
int ret;
+ if (!req) {
+ spin_unlock_irq(&ffs->ev.waitq.lock);
+ return -EINVAL;
+ }
+
req->zero = len < le16_to_cpu(ffs->ev.setup.wLength);
spin_unlock_irq(&ffs->ev.waitq.lock);
@@ -1892,10 +1897,14 @@ static void functionfs_unbind(struct ffs_data *ffs)
ENTER();
if (!WARN_ON(!ffs->gadget)) {
+ /* dequeue before freeing ep0req */
+ usb_ep_dequeue(ffs->gadget->ep0, ffs->ep0req);
+ mutex_lock(&ffs->mutex);
usb_ep_free_request(ffs->gadget->ep0, ffs->ep0req);
ffs->ep0req = NULL;
ffs->gadget = NULL;
clear_bit(FFS_FL_BOUND, &ffs->flags);
+ mutex_unlock(&ffs->mutex);
ffs_data_put(ffs);
}
}
diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c
index c36bcfa0e9b4..424bb3b666db 100644
--- a/drivers/usb/gadget/function/f_ncm.c
+++ b/drivers/usb/gadget/function/f_ncm.c
@@ -83,7 +83,9 @@ static inline struct f_ncm *func_to_ncm(struct usb_function *f)
/* peak (theoretical) bulk transfer rate in bits-per-second */
static inline unsigned ncm_bitrate(struct usb_gadget *g)
{
- if (gadget_is_superspeed(g) && g->speed >= USB_SPEED_SUPER_PLUS)
+ if (!g)
+ return 0;
+ else if (gadget_is_superspeed(g) && g->speed >= USB_SPEED_SUPER_PLUS)
return 4250000000U;
else if (gadget_is_superspeed(g) && g->speed == USB_SPEED_SUPER)
return 3750000000U;
diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c
index 08726e4c68a5..0219cd79493a 100644
--- a/drivers/usb/gadget/function/f_uac2.c
+++ b/drivers/usb/gadget/function/f_uac2.c
@@ -1142,6 +1142,7 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn)
}
std_as_out_if0_desc.bInterfaceNumber = ret;
std_as_out_if1_desc.bInterfaceNumber = ret;
+ std_as_out_if1_desc.bNumEndpoints = 1;
uac2->as_out_intf = ret;
uac2->as_out_alt = 0;
diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
index 8f12f3f8f6ee..e06022873df1 100644
--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -798,6 +798,7 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g,
net->max_mtu = GETHER_MAX_MTU_SIZE;
dev->gadget = g;
+ SET_NETDEV_DEV(net, &g->dev);
SET_NETDEV_DEVTYPE(net, &gadget_type);
status = register_netdev(net);
@@ -872,6 +873,8 @@ int gether_register_netdev(struct net_device *net)
struct usb_gadget *g;
int status;
+ if (!net->dev.parent)
+ return -EINVAL;
dev = netdev_priv(net);
g = dev->gadget;
@@ -902,6 +905,7 @@ void gether_set_gadget(struct net_device *net, struct usb_gadget *g)
dev = netdev_priv(net);
dev->gadget = g;
+ SET_NETDEV_DEV(net, &g->dev);
}
EXPORT_SYMBOL_GPL(gether_set_gadget);
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index 01c3ead7d1b4..d605bc2e7e8f 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -229,6 +229,7 @@ static void put_ep (struct ep_data *data)
*/
static const char *CHIP;
+static DEFINE_MUTEX(sb_mutex); /* Serialize superblock operations */
/*----------------------------------------------------------------------*/
@@ -2010,13 +2011,20 @@ gadgetfs_fill_super (struct super_block *sb, struct fs_context *fc)
{
struct inode *inode;
struct dev_data *dev;
+ int rc;
- if (the_device)
- return -ESRCH;
+ mutex_lock(&sb_mutex);
+
+ if (the_device) {
+ rc = -ESRCH;
+ goto Done;
+ }
CHIP = usb_get_gadget_udc_name();
- if (!CHIP)
- return -ENODEV;
+ if (!CHIP) {
+ rc = -ENODEV;
+ goto Done;
+ }
/* superblock */
sb->s_blocksize = PAGE_SIZE;
@@ -2053,13 +2061,17 @@ gadgetfs_fill_super (struct super_block *sb, struct fs_context *fc)
* from binding to a controller.
*/
the_device = dev;
- return 0;
+ rc = 0;
+ goto Done;
-Enomem:
+ Enomem:
kfree(CHIP);
CHIP = NULL;
+ rc = -ENOMEM;
- return -ENOMEM;
+ Done:
+ mutex_unlock(&sb_mutex);
+ return rc;
}
/* "mount -t gadgetfs path /dev/gadget" ends up here */
@@ -2081,6 +2093,7 @@ static int gadgetfs_init_fs_context(struct fs_context *fc)
static void
gadgetfs_kill_sb (struct super_block *sb)
{
+ mutex_lock(&sb_mutex);
kill_litter_super (sb);
if (the_device) {
put_dev (the_device);
@@ -2088,6 +2101,7 @@ gadgetfs_kill_sb (struct super_block *sb)
}
kfree(CHIP);
CHIP = NULL;
+ mutex_unlock(&sb_mutex);
}
/*----------------------------------------------------------------------*/
diff --git a/drivers/usb/gadget/legacy/webcam.c b/drivers/usb/gadget/legacy/webcam.c
index 53e38f87472b..c06dd1af7a0c 100644
--- a/drivers/usb/gadget/legacy/webcam.c
+++ b/drivers/usb/gadget/legacy/webcam.c
@@ -293,6 +293,7 @@ static const struct uvc_descriptor_header * const uvc_fs_streaming_cls[] = {
(const struct uvc_descriptor_header *) &uvc_format_yuv,
(const struct uvc_descriptor_header *) &uvc_frame_yuv_360p,
(const struct uvc_descriptor_header *) &uvc_frame_yuv_720p,
+ (const struct uvc_descriptor_header *) &uvc_color_matching,
(const struct uvc_descriptor_header *) &uvc_format_mjpg,
(const struct uvc_descriptor_header *) &uvc_frame_mjpg_360p,
(const struct uvc_descriptor_header *) &uvc_frame_mjpg_720p,
@@ -305,6 +306,7 @@ static const struct uvc_descriptor_header * const uvc_hs_streaming_cls[] = {
(const struct uvc_descriptor_header *) &uvc_format_yuv,
(const struct uvc_descriptor_header *) &uvc_frame_yuv_360p,
(const struct uvc_descriptor_header *) &uvc_frame_yuv_720p,
+ (const struct uvc_descriptor_header *) &uvc_color_matching,
(const struct uvc_descriptor_header *) &uvc_format_mjpg,
(const struct uvc_descriptor_header *) &uvc_frame_mjpg_360p,
(const struct uvc_descriptor_header *) &uvc_frame_mjpg_720p,
@@ -317,6 +319,7 @@ static const struct uvc_descriptor_header * const uvc_ss_streaming_cls[] = {
(const struct uvc_descriptor_header *) &uvc_format_yuv,
(const struct uvc_descriptor_header *) &uvc_frame_yuv_360p,
(const struct uvc_descriptor_header *) &uvc_frame_yuv_720p,
+ (const struct uvc_descriptor_header *) &uvc_color_matching,
(const struct uvc_descriptor_header *) &uvc_format_mjpg,
(const struct uvc_descriptor_header *) &uvc_frame_mjpg_360p,
(const struct uvc_descriptor_header *) &uvc_frame_mjpg_720p,
diff --git a/drivers/usb/gadget/udc/bcm63xx_udc.c b/drivers/usb/gadget/udc/bcm63xx_udc.c
index 2cdb07905bde..d04d72f5816e 100644
--- a/drivers/usb/gadget/udc/bcm63xx_udc.c
+++ b/drivers/usb/gadget/udc/bcm63xx_udc.c
@@ -1830,7 +1830,6 @@ static int bcm63xx_udc_start(struct usb_gadget *gadget,
bcm63xx_select_phy_mode(udc, true);
udc->driver = driver;
- driver->driver.bus = NULL;
udc->gadget.dev.of_node = udc->dev->of_node;
spin_unlock_irqrestore(&udc->lock, flags);
diff --git a/drivers/usb/gadget/udc/fsl_qe_udc.c b/drivers/usb/gadget/udc/fsl_qe_udc.c
index bf745358e28e..3b1cc8fa30c8 100644
--- a/drivers/usb/gadget/udc/fsl_qe_udc.c
+++ b/drivers/usb/gadget/udc/fsl_qe_udc.c
@@ -2285,7 +2285,6 @@ static int fsl_qe_start(struct usb_gadget *gadget,
/* lock is needed but whether should use this lock or another */
spin_lock_irqsave(&udc->lock, flags);
- driver->driver.bus = NULL;
/* hook up the driver */
udc->driver = driver;
udc->gadget.speed = driver->max_speed;
diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c
index 50435e804118..a67873a074b7 100644
--- a/drivers/usb/gadget/udc/fsl_udc_core.c
+++ b/drivers/usb/gadget/udc/fsl_udc_core.c
@@ -1943,7 +1943,6 @@ static int fsl_udc_start(struct usb_gadget *g,
/* lock is needed but whether should use this lock or another */
spin_lock_irqsave(&udc_controller->lock, flags);
- driver->driver.bus = NULL;
/* hook up the driver */
udc_controller->driver = driver;
spin_unlock_irqrestore(&udc_controller->lock, flags);
diff --git a/drivers/usb/gadget/udc/fusb300_udc.c b/drivers/usb/gadget/udc/fusb300_udc.c
index 9af8b415f303..5954800d652c 100644
--- a/drivers/usb/gadget/udc/fusb300_udc.c
+++ b/drivers/usb/gadget/udc/fusb300_udc.c
@@ -1311,7 +1311,6 @@ static int fusb300_udc_start(struct usb_gadget *g,
struct fusb300 *fusb300 = to_fusb300(g);
/* hook up the driver */
- driver->driver.bus = NULL;
fusb300->driver = driver;
return 0;
diff --git a/drivers/usb/gadget/udc/goku_udc.c b/drivers/usb/gadget/udc/goku_udc.c
index bdc56b24b5c9..5ffb3d5c635b 100644
--- a/drivers/usb/gadget/udc/goku_udc.c
+++ b/drivers/usb/gadget/udc/goku_udc.c
@@ -1375,7 +1375,6 @@ static int goku_udc_start(struct usb_gadget *g,
struct goku_udc *dev = to_goku_udc(g);
/* hook up the driver */
- driver->driver.bus = NULL;
dev->driver = driver;
/*
diff --git a/drivers/usb/gadget/udc/gr_udc.c b/drivers/usb/gadget/udc/gr_udc.c
index 22096f8505de..85cdc0af3bf9 100644
--- a/drivers/usb/gadget/udc/gr_udc.c
+++ b/drivers/usb/gadget/udc/gr_udc.c
@@ -1906,7 +1906,6 @@ static int gr_udc_start(struct usb_gadget *gadget,
spin_lock(&dev->lock);
/* Hook up the driver */
- driver->driver.bus = NULL;
dev->driver = driver;
/* Get ready for host detection */
diff --git a/drivers/usb/gadget/udc/m66592-udc.c b/drivers/usb/gadget/udc/m66592-udc.c
index c7e421b449f3..06e21cee431b 100644
--- a/drivers/usb/gadget/udc/m66592-udc.c
+++ b/drivers/usb/gadget/udc/m66592-udc.c
@@ -1454,7 +1454,6 @@ static int m66592_udc_start(struct usb_gadget *g,
struct m66592 *m66592 = to_m66592(g);
/* hook up the driver */
- driver->driver.bus = NULL;
m66592->driver = driver;
m66592_bset(m66592, M66592_VBSE | M66592_URST, M66592_INTENB0);
diff --git a/drivers/usb/gadget/udc/max3420_udc.c b/drivers/usb/gadget/udc/max3420_udc.c
index 3074da00c3df..ddf0ed3eb4f2 100644
--- a/drivers/usb/gadget/udc/max3420_udc.c
+++ b/drivers/usb/gadget/udc/max3420_udc.c
@@ -1108,7 +1108,6 @@ static int max3420_udc_start(struct usb_gadget *gadget,
spin_lock_irqsave(&udc->lock, flags);
/* hook up the driver */
- driver->driver.bus = NULL;
udc->driver = driver;
udc->gadget.speed = USB_SPEED_FULL;
diff --git a/drivers/usb/gadget/udc/mv_u3d_core.c b/drivers/usb/gadget/udc/mv_u3d_core.c
index 598654a3cb41..411b6179782c 100644
--- a/drivers/usb/gadget/udc/mv_u3d_core.c
+++ b/drivers/usb/gadget/udc/mv_u3d_core.c
@@ -1243,7 +1243,6 @@ static int mv_u3d_start(struct usb_gadget *g,
}
/* hook up the driver ... */
- driver->driver.bus = NULL;
u3d->driver = driver;
u3d->ep0_dir = USB_DIR_OUT;
diff --git a/drivers/usb/gadget/udc/mv_udc_core.c b/drivers/usb/gadget/udc/mv_udc_core.c
index fdb17d86cd65..b397f3a848cf 100644
--- a/drivers/usb/gadget/udc/mv_udc_core.c
+++ b/drivers/usb/gadget/udc/mv_udc_core.c
@@ -1359,7 +1359,6 @@ static int mv_udc_start(struct usb_gadget *gadget,
spin_lock_irqsave(&udc->lock, flags);
/* hook up the driver ... */
- driver->driver.bus = NULL;
udc->driver = driver;
udc->usb_state = USB_STATE_ATTACHED;
diff --git a/drivers/usb/gadget/udc/net2272.c b/drivers/usb/gadget/udc/net2272.c
index 84605a4d0715..538c1b9a2883 100644
--- a/drivers/usb/gadget/udc/net2272.c
+++ b/drivers/usb/gadget/udc/net2272.c
@@ -1451,7 +1451,6 @@ static int net2272_start(struct usb_gadget *_gadget,
dev->ep[i].irqs = 0;
/* hook up the driver ... */
dev->softconnect = 1;
- driver->driver.bus = NULL;
dev->driver = driver;
/* ... then enable host detection and ep0; and we're ready
diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c
index d6a68631354a..1b929c519cd7 100644
--- a/drivers/usb/gadget/udc/net2280.c
+++ b/drivers/usb/gadget/udc/net2280.c
@@ -2423,7 +2423,6 @@ static int net2280_start(struct usb_gadget *_gadget,
dev->ep[i].irqs = 0;
/* hook up the driver ... */
- driver->driver.bus = NULL;
dev->driver = driver;
retval = device_create_file(&dev->pdev->dev, &dev_attr_function);
diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c
index bea346e362b2..f660ebfa1379 100644
--- a/drivers/usb/gadget/udc/omap_udc.c
+++ b/drivers/usb/gadget/udc/omap_udc.c
@@ -2066,7 +2066,6 @@ static int omap_udc_start(struct usb_gadget *g,
udc->softconnect = 1;
/* hook up the driver */
- driver->driver.bus = NULL;
udc->driver = driver;
spin_unlock_irqrestore(&udc->lock, flags);
diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c
index 9bb7a9d7a2fb..4f8617210d85 100644
--- a/drivers/usb/gadget/udc/pch_udc.c
+++ b/drivers/usb/gadget/udc/pch_udc.c
@@ -2908,7 +2908,6 @@ static int pch_udc_start(struct usb_gadget *g,
{
struct pch_udc_dev *dev = to_pch_udc(g);
- driver->driver.bus = NULL;
dev->driver = driver;
/* get ready for ep0 traffic */
diff --git a/drivers/usb/gadget/udc/snps_udc_core.c b/drivers/usb/gadget/udc/snps_udc_core.c
index 52ea4dcf6a92..2fc5d4d277bc 100644
--- a/drivers/usb/gadget/udc/snps_udc_core.c
+++ b/drivers/usb/gadget/udc/snps_udc_core.c
@@ -1933,7 +1933,6 @@ static int amd5536_udc_start(struct usb_gadget *g,
struct udc *dev = to_amd5536_udc(g);
u32 tmp;
- driver->driver.bus = NULL;
dev->driver = driver;
/* Some gadget drivers use both ep0 directions.
diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c
index 9cea785934e5..38d06e5abfbb 100644
--- a/drivers/usb/host/ehci-fsl.c
+++ b/drivers/usb/host/ehci-fsl.c
@@ -29,7 +29,7 @@
#include "ehci-fsl.h"
#define DRIVER_DESC "Freescale EHCI Host controller driver"
-#define DRV_NAME "ehci-fsl"
+#define DRV_NAME "fsl-ehci"
static struct hc_driver __read_mostly fsl_ehci_hc_driver;
diff --git a/drivers/usb/host/xen-hcd.c b/drivers/usb/host/xen-hcd.c
index de1b09158318..46fdab940092 100644
--- a/drivers/usb/host/xen-hcd.c
+++ b/drivers/usb/host/xen-hcd.c
@@ -1530,15 +1530,13 @@ static void xenhcd_backend_changed(struct xenbus_device *dev,
}
}
-static int xenhcd_remove(struct xenbus_device *dev)
+static void xenhcd_remove(struct xenbus_device *dev)
{
struct xenhcd_info *info = dev_get_drvdata(&dev->dev);
struct usb_hcd *hcd = xenhcd_info_to_hcd(info);
xenhcd_destroy_rings(info);
usb_put_hcd(hcd);
-
- return 0;
}
static int xenhcd_probe(struct xenbus_device *dev,
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 79d679b3e076..fb988e4ea924 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -78,9 +78,12 @@ static const char hcd_name[] = "xhci_hcd";
static struct hc_driver __read_mostly xhci_pci_hc_driver;
static int xhci_pci_setup(struct usb_hcd *hcd);
+static int xhci_pci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev,
+ struct usb_tt *tt, gfp_t mem_flags);
static const struct xhci_driver_overrides xhci_pci_overrides __initconst = {
.reset = xhci_pci_setup,
+ .update_hub_device = xhci_pci_update_hub_device,
};
/* called after powerup, by probe or system-pm "wakeup" */
@@ -352,8 +355,38 @@ static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev)
NULL);
ACPI_FREE(obj);
}
+
+static void xhci_find_lpm_incapable_ports(struct usb_hcd *hcd, struct usb_device *hdev)
+{
+ struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+ struct xhci_hub *rhub = &xhci->usb3_rhub;
+ int ret;
+ int i;
+
+ /* This is not the usb3 roothub we are looking for */
+ if (hcd != rhub->hcd)
+ return;
+
+ if (hdev->maxchild > rhub->num_ports) {
+ dev_err(&hdev->dev, "USB3 roothub port number mismatch\n");
+ return;
+ }
+
+ for (i = 0; i < hdev->maxchild; i++) {
+ ret = usb_acpi_port_lpm_incapable(hdev, i);
+
+ dev_dbg(&hdev->dev, "port-%d disable U1/U2 _DSM: %d\n", i + 1, ret);
+
+ if (ret >= 0) {
+ rhub->ports[i]->lpm_incapable = ret;
+ continue;
+ }
+ }
+}
+
#else
static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev) { }
+static void xhci_find_lpm_incapable_ports(struct usb_hcd *hcd, struct usb_device *hdev) { }
#endif /* CONFIG_ACPI */
/* called during probe() after chip reset completes */
@@ -386,6 +419,16 @@ static int xhci_pci_setup(struct usb_hcd *hcd)
return xhci_pci_reinit(xhci, pdev);
}
+static int xhci_pci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev,
+ struct usb_tt *tt, gfp_t mem_flags)
+{
+ /* Check if acpi claims some USB3 roothub ports are lpm incapable */
+ if (!hdev->parent)
+ xhci_find_lpm_incapable_ports(hcd, hdev);
+
+ return xhci_update_hub_device(hcd, hdev, tt, mem_flags);
+}
+
/*
* We need to register our own PCI probe function (instead of the USB core's
* function) in order to create a second roothub under xHCI.
@@ -455,6 +498,8 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
if (xhci->quirks & XHCI_DEFAULT_PM_RUNTIME_ALLOW)
pm_runtime_allow(&dev->dev);
+ dma_set_max_seg_size(&dev->dev, UINT_MAX);
+
return 0;
put_usb3_hcd:
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index ddc30037f9ce..f5b0e1ce22af 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1169,7 +1169,10 @@ static void xhci_kill_endpoint_urbs(struct xhci_hcd *xhci,
struct xhci_virt_ep *ep;
struct xhci_ring *ring;
- ep = &xhci->devs[slot_id]->eps[ep_index];
+ ep = xhci_get_virt_ep(xhci, slot_id, ep_index);
+ if (!ep)
+ return;
+
if ((ep->ep_state & EP_HAS_STREAMS) ||
(ep->ep_state & EP_GETTING_NO_STREAMS)) {
int stream_id;
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 79d7931c048a..2b280beb0011 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -3974,6 +3974,7 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
struct xhci_hcd *xhci = hcd_to_xhci(hcd);
struct xhci_virt_device *virt_dev;
struct xhci_slot_ctx *slot_ctx;
+ unsigned long flags;
int i, ret;
/*
@@ -4000,7 +4001,11 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
virt_dev->eps[i].ep_state &= ~EP_STOP_CMD_PENDING;
virt_dev->udev = NULL;
xhci_disable_slot(xhci, udev->slot_id);
+
+ spin_lock_irqsave(&xhci->lock, flags);
xhci_free_virt_device(xhci, udev->slot_id);
+ spin_unlock_irqrestore(&xhci->lock, flags);
+
}
int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id)
@@ -5044,6 +5049,7 @@ static int xhci_enable_usb3_lpm_timeout(struct usb_hcd *hcd,
struct usb_device *udev, enum usb3_link_state state)
{
struct xhci_hcd *xhci;
+ struct xhci_port *port;
u16 hub_encoded_timeout;
int mel;
int ret;
@@ -5060,6 +5066,13 @@ static int xhci_enable_usb3_lpm_timeout(struct usb_hcd *hcd,
if (xhci_check_tier_policy(xhci, udev, state) < 0)
return USB3_LPM_DISABLED;
+ /* If connected to root port then check port can handle lpm */
+ if (udev->parent && !udev->parent->parent) {
+ port = xhci->usb3_rhub.ports[udev->portnum - 1];
+ if (port->lpm_incapable)
+ return USB3_LPM_DISABLED;
+ }
+
hub_encoded_timeout = xhci_calculate_lpm_timeout(hcd, udev, state);
mel = calculate_max_exit_latency(udev, state, hub_encoded_timeout);
if (mel < 0) {
@@ -5119,7 +5132,7 @@ static int xhci_disable_usb3_lpm_timeout(struct usb_hcd *hcd,
/* Once a hub descriptor is fetched for a device, we need to update the xHC's
* internal data structures for the device.
*/
-static int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev,
+int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev,
struct usb_tt *tt, gfp_t mem_flags)
{
struct xhci_hcd *xhci = hcd_to_xhci(hcd);
@@ -5219,6 +5232,7 @@ static int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev,
xhci_free_command(xhci, config_cmd);
return ret;
}
+EXPORT_SYMBOL_GPL(xhci_update_hub_device);
static int xhci_get_frame(struct usb_hcd *hcd)
{
@@ -5502,6 +5516,8 @@ void xhci_init_driver(struct hc_driver *drv,
drv->check_bandwidth = over->check_bandwidth;
if (over->reset_bandwidth)
drv->reset_bandwidth = over->reset_bandwidth;
+ if (over->update_hub_device)
+ drv->update_hub_device = over->update_hub_device;
}
}
EXPORT_SYMBOL_GPL(xhci_init_driver);
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index c9f06c5e4e9d..dcee7f3207ad 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1735,6 +1735,7 @@ struct xhci_port {
int hcd_portnum;
struct xhci_hub *rhub;
struct xhci_port_cap *port_cap;
+ unsigned int lpm_incapable:1;
};
struct xhci_hub {
@@ -1943,6 +1944,8 @@ struct xhci_driver_overrides {
struct usb_host_endpoint *ep);
int (*check_bandwidth)(struct usb_hcd *, struct usb_device *);
void (*reset_bandwidth)(struct usb_hcd *, struct usb_device *);
+ int (*update_hub_device)(struct usb_hcd *hcd, struct usb_device *hdev,
+ struct usb_tt *tt, gfp_t mem_flags);
};
#define XHCI_CFC_DELAY 10
@@ -2122,6 +2125,8 @@ int xhci_drop_endpoint(struct usb_hcd *hcd, struct usb_device *udev,
struct usb_host_endpoint *ep);
int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev);
void xhci_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev);
+int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev,
+ struct usb_tt *tt, gfp_t mem_flags);
int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id);
int xhci_ext_cap_init(struct xhci_hcd *xhci);
diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index f9427a67789c..1e3df27bab58 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -814,7 +814,7 @@ static int iowarrior_probe(struct usb_interface *interface,
break;
case USB_DEVICE_ID_CODEMERCS_IOW100:
- dev->report_size = 13;
+ dev->report_size = 12;
break;
}
}
diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c
index 94e7966e199d..969c4c4f2ae9 100644
--- a/drivers/usb/misc/onboard_usb_hub.c
+++ b/drivers/usb/misc/onboard_usb_hub.c
@@ -27,7 +27,10 @@
#include "onboard_usb_hub.h"
+static void onboard_hub_attach_usb_driver(struct work_struct *work);
+
static struct usb_device_driver onboard_hub_usbdev_driver;
+static DECLARE_WORK(attach_usb_driver_work, onboard_hub_attach_usb_driver);
/************************** Platform driver **************************/
@@ -45,7 +48,6 @@ struct onboard_hub {
bool is_powered_on;
bool going_away;
struct list_head udev_list;
- struct work_struct attach_usb_driver_work;
struct mutex lock;
};
@@ -271,8 +273,7 @@ static int onboard_hub_probe(struct platform_device *pdev)
* This needs to be done deferred to avoid self-deadlocks on systems
* with nested onboard hubs.
*/
- INIT_WORK(&hub->attach_usb_driver_work, onboard_hub_attach_usb_driver);
- schedule_work(&hub->attach_usb_driver_work);
+ schedule_work(&attach_usb_driver_work);
return 0;
}
@@ -285,9 +286,6 @@ static int onboard_hub_remove(struct platform_device *pdev)
hub->going_away = true;
- if (&hub->attach_usb_driver_work != current_work())
- cancel_work_sync(&hub->attach_usb_driver_work);
-
mutex_lock(&hub->lock);
/* unbind the USB devices to avoid dangling references to this device */
@@ -433,13 +431,13 @@ static int __init onboard_hub_init(void)
{
int ret;
- ret = platform_driver_register(&onboard_hub_driver);
+ ret = usb_register_device_driver(&onboard_hub_usbdev_driver, THIS_MODULE);
if (ret)
return ret;
- ret = usb_register_device_driver(&onboard_hub_usbdev_driver, THIS_MODULE);
+ ret = platform_driver_register(&onboard_hub_driver);
if (ret)
- platform_driver_unregister(&onboard_hub_driver);
+ usb_deregister_device_driver(&onboard_hub_usbdev_driver);
return ret;
}
@@ -449,6 +447,8 @@ static void __exit onboard_hub_exit(void)
{
usb_deregister_device_driver(&onboard_hub_usbdev_driver);
platform_driver_unregister(&onboard_hub_driver);
+
+ cancel_work_sync(&attach_usb_driver_work);
}
module_exit(onboard_hub_exit);
diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index 476f55d1fec3..44a21ec865fb 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -411,8 +411,10 @@ static int omap2430_probe(struct platform_device *pdev)
memset(musb_res, 0, sizeof(*musb_res) * ARRAY_SIZE(musb_res));
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res)
+ if (!res) {
+ ret = -EINVAL;
goto err2;
+ }
musb_res[i].start = res->start;
musb_res[i].end = res->end;
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 67372acc2352..832ad592b7ef 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -60,6 +60,7 @@ static const struct usb_device_id id_table[] = {
{ USB_DEVICE(0x0846, 0x1100) }, /* NetGear Managed Switch M4100 series, M5300 series, M7100 series */
{ USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */
{ USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */
+ { USB_DEVICE(0x0908, 0x0070) }, /* Siemens SCALANCE LPE-9000 USB Serial Console */
{ USB_DEVICE(0x0908, 0x01FF) }, /* Siemens RUGGEDCOM USB Serial Console */
{ USB_DEVICE(0x0988, 0x0578) }, /* Teraoka AD2000 */
{ USB_DEVICE(0x0B00, 0x3070) }, /* Ingenico 3070 */
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index dee79c7d82d5..ee5ac4ef7e16 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -255,10 +255,16 @@ static void option_instat_callback(struct urb *urb);
#define QUECTEL_PRODUCT_EP06 0x0306
#define QUECTEL_PRODUCT_EM05G 0x030a
#define QUECTEL_PRODUCT_EM060K 0x030b
+#define QUECTEL_PRODUCT_EM05G_CS 0x030c
+#define QUECTEL_PRODUCT_EM05CN_SG 0x0310
#define QUECTEL_PRODUCT_EM05G_SG 0x0311
+#define QUECTEL_PRODUCT_EM05CN 0x0312
+#define QUECTEL_PRODUCT_EM05G_GR 0x0313
+#define QUECTEL_PRODUCT_EM05G_RS 0x0314
#define QUECTEL_PRODUCT_EM12 0x0512
#define QUECTEL_PRODUCT_RM500Q 0x0800
#define QUECTEL_PRODUCT_RM520N 0x0801
+#define QUECTEL_PRODUCT_EC200U 0x0901
#define QUECTEL_PRODUCT_EC200S_CN 0x6002
#define QUECTEL_PRODUCT_EC200T 0x6026
#define QUECTEL_PRODUCT_RM500K 0x7001
@@ -1159,8 +1165,18 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff),
.driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) },
+ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05CN, 0xff),
+ .driver_info = RSVD(6) | ZLP },
+ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05CN_SG, 0xff),
+ .driver_info = RSVD(6) | ZLP },
{ USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G, 0xff),
.driver_info = RSVD(6) | ZLP },
+ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_CS, 0xff),
+ .driver_info = RSVD(6) | ZLP },
+ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_GR, 0xff),
+ .driver_info = RSVD(6) | ZLP },
+ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_RS, 0xff),
+ .driver_info = RSVD(6) | ZLP },
{ USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_SG, 0xff),
.driver_info = RSVD(6) | ZLP },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0x00, 0x40) },
@@ -1180,6 +1196,7 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0xff, 0x30) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0, 0x40) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0, 0) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200U, 0xff, 0, 0) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200S_CN, 0xff, 0, 0) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500K, 0xff, 0x00, 0x00) },
diff --git a/drivers/usb/storage/uas-detect.h b/drivers/usb/storage/uas-detect.h
index 3f720faa6f97..d73282c0ec50 100644
--- a/drivers/usb/storage/uas-detect.h
+++ b/drivers/usb/storage/uas-detect.h
@@ -116,6 +116,19 @@ static int uas_use_uas_driver(struct usb_interface *intf,
if (le16_to_cpu(udev->descriptor.idVendor) == 0x0bc2)
flags |= US_FL_NO_ATA_1X;
+ /*
+ * RTL9210-based enclosure from HIKSEMI, MD202 reportedly have issues
+ * with UAS. This isn't distinguishable with just idVendor and
+ * idProduct, use manufacturer and product too.
+ *
+ * Reported-by: Hongling Zeng <zenghongling@kylinos.cn>
+ */
+ if (le16_to_cpu(udev->descriptor.idVendor) == 0x0bda &&
+ le16_to_cpu(udev->descriptor.idProduct) == 0x9210 &&
+ (udev->manufacturer && !strcmp(udev->manufacturer, "HIKSEMI")) &&
+ (udev->product && !strcmp(udev->product, "MD202")))
+ flags |= US_FL_IGNORE_UAS;
+
usb_stor_adjust_quirks(udev, &flags);
if (flags & US_FL_IGNORE_UAS) {
diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
index 251778d14e2d..c7b763d6d102 100644
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -83,13 +83,6 @@ UNUSUAL_DEV(0x0bc2, 0x331a, 0x0000, 0x9999,
USB_SC_DEVICE, USB_PR_DEVICE, NULL,
US_FL_NO_REPORT_LUNS),
-/* Reported-by: Hongling Zeng <zenghongling@kylinos.cn> */
-UNUSUAL_DEV(0x0bda, 0x9210, 0x0000, 0x9999,
- "Hiksemi",
- "External HDD",
- USB_SC_DEVICE, USB_PR_DEVICE, NULL,
- US_FL_IGNORE_UAS),
-
/* Reported-by: Benjamin Tissoires <benjamin.tissoires@redhat.com> */
UNUSUAL_DEV(0x13fd, 0x3940, 0x0000, 0x9999,
"Initio Corporation",
diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c
index de66a2949e33..50b24096eb7f 100644
--- a/drivers/usb/typec/altmodes/displayport.c
+++ b/drivers/usb/typec/altmodes/displayport.c
@@ -419,6 +419,18 @@ static const char * const pin_assignments[] = {
[DP_PIN_ASSIGN_F] = "F",
};
+/*
+ * Helper function to extract a peripheral's currently supported
+ * Pin Assignments from its DisplayPort alternate mode state.
+ */
+static u8 get_current_pin_assignments(struct dp_altmode *dp)
+{
+ if (DP_CONF_CURRENTLY(dp->data.conf) == DP_CONF_UFP_U_AS_DFP_D)
+ return DP_CAP_PIN_ASSIGN_DFP_D(dp->alt->vdo);
+ else
+ return DP_CAP_PIN_ASSIGN_UFP_D(dp->alt->vdo);
+}
+
static ssize_t
pin_assignment_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t size)
@@ -445,10 +457,7 @@ pin_assignment_store(struct device *dev, struct device_attribute *attr,
goto out_unlock;
}
- if (DP_CONF_CURRENTLY(dp->data.conf) == DP_CONF_DFP_D)
- assignments = DP_CAP_UFP_D_PIN_ASSIGN(dp->alt->vdo);
- else
- assignments = DP_CAP_DFP_D_PIN_ASSIGN(dp->alt->vdo);
+ assignments = get_current_pin_assignments(dp);
if (!(DP_CONF_GET_PIN_ASSIGN(conf) & assignments)) {
ret = -EINVAL;
@@ -485,10 +494,7 @@ static ssize_t pin_assignment_show(struct device *dev,
cur = get_count_order(DP_CONF_GET_PIN_ASSIGN(dp->data.conf));
- if (DP_CONF_CURRENTLY(dp->data.conf) == DP_CONF_DFP_D)
- assignments = DP_CAP_UFP_D_PIN_ASSIGN(dp->alt->vdo);
- else
- assignments = DP_CAP_DFP_D_PIN_ASSIGN(dp->alt->vdo);
+ assignments = get_current_pin_assignments(dp);
for (i = 0; assignments; assignments >>= 1, i++) {
if (assignments & 1) {
@@ -529,10 +535,10 @@ int dp_altmode_probe(struct typec_altmode *alt)
/* FIXME: Port can only be DFP_U. */
/* Make sure we have compatiple pin configurations */
- if (!(DP_CAP_DFP_D_PIN_ASSIGN(port->vdo) &
- DP_CAP_UFP_D_PIN_ASSIGN(alt->vdo)) &&
- !(DP_CAP_UFP_D_PIN_ASSIGN(port->vdo) &
- DP_CAP_DFP_D_PIN_ASSIGN(alt->vdo)))
+ if (!(DP_CAP_PIN_ASSIGN_DFP_D(port->vdo) &
+ DP_CAP_PIN_ASSIGN_UFP_D(alt->vdo)) &&
+ !(DP_CAP_PIN_ASSIGN_UFP_D(port->vdo) &
+ DP_CAP_PIN_ASSIGN_DFP_D(alt->vdo)))
return -ENODEV;
ret = sysfs_create_group(&alt->dev.kobj, &dp_altmode_group);
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 904c7b4ce2f0..59b366b5c614 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -4594,14 +4594,13 @@ static void run_state_machine(struct tcpm_port *port)
tcpm_set_state(port, ready_state(port), 0);
break;
case DR_SWAP_CHANGE_DR:
- if (port->data_role == TYPEC_HOST) {
- tcpm_unregister_altmodes(port);
+ tcpm_unregister_altmodes(port);
+ if (port->data_role == TYPEC_HOST)
tcpm_set_roles(port, true, port->pwr_role,
TYPEC_DEVICE);
- } else {
+ else
tcpm_set_roles(port, true, port->pwr_role,
TYPEC_HOST);
- }
tcpm_ams_finish(port);
tcpm_set_state(port, ready_state(port), 0);
break;
diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index eabe519013e7..1cf8947c6d66 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -187,6 +187,7 @@ EXPORT_SYMBOL_GPL(ucsi_send_command);
struct ucsi_work {
struct delayed_work work;
+ struct list_head node;
unsigned long delay;
unsigned int count;
struct ucsi_connector *con;
@@ -202,6 +203,7 @@ static void ucsi_poll_worker(struct work_struct *work)
mutex_lock(&con->lock);
if (!con->partner) {
+ list_del(&uwork->node);
mutex_unlock(&con->lock);
kfree(uwork);
return;
@@ -209,10 +211,12 @@ static void ucsi_poll_worker(struct work_struct *work)
ret = uwork->cb(con);
- if (uwork->count-- && (ret == -EBUSY || ret == -ETIMEDOUT))
+ if (uwork->count-- && (ret == -EBUSY || ret == -ETIMEDOUT)) {
queue_delayed_work(con->wq, &uwork->work, uwork->delay);
- else
+ } else {
+ list_del(&uwork->node);
kfree(uwork);
+ }
mutex_unlock(&con->lock);
}
@@ -236,6 +240,7 @@ static int ucsi_partner_task(struct ucsi_connector *con,
uwork->con = con;
uwork->cb = cb;
+ list_add_tail(&uwork->node, &con->partner_tasks);
queue_delayed_work(con->wq, &uwork->work, delay);
return 0;
@@ -1056,6 +1061,7 @@ static int ucsi_register_port(struct ucsi *ucsi, int index)
INIT_WORK(&con->work, ucsi_handle_connector_change);
init_completion(&con->complete);
mutex_init(&con->lock);
+ INIT_LIST_HEAD(&con->partner_tasks);
con->num = index + 1;
con->ucsi = ucsi;
@@ -1263,6 +1269,9 @@ err_unregister:
con->port = NULL;
}
+ kfree(ucsi->connector);
+ ucsi->connector = NULL;
+
err_reset:
memset(&ucsi->cap, 0, sizeof(ucsi->cap));
ucsi_reset_ppm(ucsi);
@@ -1294,7 +1303,8 @@ static void ucsi_resume_work(struct work_struct *work)
int ucsi_resume(struct ucsi *ucsi)
{
- queue_work(system_long_wq, &ucsi->resume_work);
+ if (ucsi->connector)
+ queue_work(system_long_wq, &ucsi->resume_work);
return 0;
}
EXPORT_SYMBOL_GPL(ucsi_resume);
@@ -1414,14 +1424,29 @@ void ucsi_unregister(struct ucsi *ucsi)
/* Disable notifications */
ucsi->ops->async_write(ucsi, UCSI_CONTROL, &cmd, sizeof(cmd));
+ if (!ucsi->connector)
+ return;
+
for (i = 0; i < ucsi->cap.num_connectors; i++) {
cancel_work_sync(&ucsi->connector[i].work);
ucsi_unregister_partner(&ucsi->connector[i]);
ucsi_unregister_altmodes(&ucsi->connector[i],
UCSI_RECIPIENT_CON);
ucsi_unregister_port_psy(&ucsi->connector[i]);
- if (ucsi->connector[i].wq)
+
+ if (ucsi->connector[i].wq) {
+ struct ucsi_work *uwork;
+
+ mutex_lock(&ucsi->connector[i].lock);
+ /*
+ * queue delayed items immediately so they can execute
+ * and free themselves before the wq is destroyed
+ */
+ list_for_each_entry(uwork, &ucsi->connector[i].partner_tasks, node)
+ mod_delayed_work(ucsi->connector[i].wq, &uwork->work, 0);
+ mutex_unlock(&ucsi->connector[i].lock);
destroy_workqueue(ucsi->connector[i].wq);
+ }
typec_unregister_port(ucsi->connector[i].port);
}
diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h
index c968474ee547..60ce9fb6e745 100644
--- a/drivers/usb/typec/ucsi/ucsi.h
+++ b/drivers/usb/typec/ucsi/ucsi.h
@@ -322,6 +322,7 @@ struct ucsi_connector {
struct work_struct work;
struct completion complete;
struct workqueue_struct *wq;
+ struct list_head partner_tasks;
struct typec_port *port;
struct typec_partner *partner;
diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index f9c0044c6442..44b29289aa19 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -849,7 +849,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
ret = ifcvf_init_hw(vf, pdev);
if (ret) {
IFCVF_ERR(pdev, "Failed to init IFCVF hw\n");
- return ret;
+ goto err;
}
for (i = 0; i < vf->nr_vring; i++)
diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
index 6af9fdbb86b7..058fbe28107e 100644
--- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
@@ -116,8 +116,9 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in,
int inlen);
int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey);
int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
- bool *change_map);
-int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb);
+ bool *change_map, unsigned int asid);
+int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
+ unsigned int asid);
void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev);
#define mlx5_vdpa_warn(__dev, format, ...) \
diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
index a639b9208d41..0a1e0b0dc37e 100644
--- a/drivers/vdpa/mlx5/core/mr.c
+++ b/drivers/vdpa/mlx5/core/mr.c
@@ -311,7 +311,6 @@ static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8
u64 st;
u64 sz;
int err;
- int i = 0;
st = start;
while (size) {
@@ -336,7 +335,6 @@ static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8
mr->num_directs++;
mr->num_klms++;
st += sz;
- i++;
}
list_splice_tail(&tmp, &mr->head);
return 0;
@@ -511,7 +509,8 @@ out:
mutex_unlock(&mr->mkey_mtx);
}
-static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
+ struct vhost_iotlb *iotlb, unsigned int asid)
{
struct mlx5_vdpa_mr *mr = &mvdev->mr;
int err;
@@ -519,42 +518,49 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb
if (mr->initialized)
return 0;
- if (iotlb)
- err = create_user_mr(mvdev, iotlb);
- else
- err = create_dma_mr(mvdev, mr);
+ if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
+ if (iotlb)
+ err = create_user_mr(mvdev, iotlb);
+ else
+ err = create_dma_mr(mvdev, mr);
- if (err)
- return err;
+ if (err)
+ return err;
+ }
- err = dup_iotlb(mvdev, iotlb);
- if (err)
- goto out_err;
+ if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
+ err = dup_iotlb(mvdev, iotlb);
+ if (err)
+ goto out_err;
+ }
mr->initialized = true;
return 0;
out_err:
- if (iotlb)
- destroy_user_mr(mvdev, mr);
- else
- destroy_dma_mr(mvdev, mr);
+ if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
+ if (iotlb)
+ destroy_user_mr(mvdev, mr);
+ else
+ destroy_dma_mr(mvdev, mr);
+ }
return err;
}
-int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
+ unsigned int asid)
{
int err;
mutex_lock(&mvdev->mr.mkey_mtx);
- err = _mlx5_vdpa_create_mr(mvdev, iotlb);
+ err = _mlx5_vdpa_create_mr(mvdev, iotlb, asid);
mutex_unlock(&mvdev->mr.mkey_mtx);
return err;
}
int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
- bool *change_map)
+ bool *change_map, unsigned int asid)
{
struct mlx5_vdpa_mr *mr = &mvdev->mr;
int err = 0;
@@ -566,7 +572,7 @@ int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *io
*change_map = true;
}
if (!*change_map)
- err = _mlx5_vdpa_create_mr(mvdev, iotlb);
+ err = _mlx5_vdpa_create_mr(mvdev, iotlb, asid);
mutex_unlock(&mr->mkey_mtx);
return err;
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 90913365def4..3a6dbbc6440d 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -1468,11 +1468,13 @@ static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
eth_broadcast_addr(dmac_c);
ether_addr_copy(dmac_v, mac);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
+ if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
+ }
if (tagged) {
MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, vid);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
}
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
@@ -1684,7 +1686,7 @@ static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
/* Need recreate the flow table entry, so that the packet could forward back
*/
- mac_vlan_del(ndev, ndev->config.mac, 0, false);
+ mac_vlan_del(ndev, mac_back, 0, false);
if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
@@ -1821,6 +1823,9 @@ static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
size_t read;
u16 id;
+ if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
+ return status;
+
switch (cmd) {
case VIRTIO_NET_CTRL_VLAN_ADD:
read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
@@ -2389,7 +2394,8 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev)
}
}
-static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
+ struct vhost_iotlb *iotlb, unsigned int asid)
{
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
int err;
@@ -2401,7 +2407,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb
teardown_driver(ndev);
mlx5_vdpa_destroy_mr(mvdev);
- err = mlx5_vdpa_create_mr(mvdev, iotlb);
+ err = mlx5_vdpa_create_mr(mvdev, iotlb, asid);
if (err)
goto err_mr;
@@ -2582,7 +2588,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
++mvdev->generation;
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
- if (mlx5_vdpa_create_mr(mvdev, NULL))
+ if (mlx5_vdpa_create_mr(mvdev, NULL, 0))
mlx5_vdpa_warn(mvdev, "create MR failed\n");
}
up_write(&ndev->reslock);
@@ -2618,41 +2624,20 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
return mvdev->generation;
}
-static int set_map_control(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
-{
- u64 start = 0ULL, last = 0ULL - 1;
- struct vhost_iotlb_map *map;
- int err = 0;
-
- spin_lock(&mvdev->cvq.iommu_lock);
- vhost_iotlb_reset(mvdev->cvq.iotlb);
-
- for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
- map = vhost_iotlb_itree_next(map, start, last)) {
- err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start,
- map->last, map->addr, map->perm);
- if (err)
- goto out;
- }
-
-out:
- spin_unlock(&mvdev->cvq.iommu_lock);
- return err;
-}
-
-static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
+ unsigned int asid)
{
bool change_map;
int err;
- err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
+ err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map, asid);
if (err) {
mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
return err;
}
if (change_map)
- err = mlx5_vdpa_change_map(mvdev, iotlb);
+ err = mlx5_vdpa_change_map(mvdev, iotlb, asid);
return err;
}
@@ -2665,16 +2650,7 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
int err = -EINVAL;
down_write(&ndev->reslock);
- if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
- err = set_map_data(mvdev, iotlb);
- if (err)
- goto out;
- }
-
- if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid)
- err = set_map_control(mvdev, iotlb);
-
-out:
+ err = set_map_data(mvdev, iotlb, asid);
up_write(&ndev->reslock);
return err;
}
@@ -2840,8 +2816,8 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
int i;
down_write(&ndev->reslock);
- mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
ndev->nb_registered = false;
+ mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
flush_workqueue(ndev->mvdev.wq);
for (i = 0; i < ndev->cur_num_vqs; i++) {
mvq = &ndev->vqs[i];
@@ -3019,7 +2995,7 @@ static void update_carrier(struct work_struct *work)
else
ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
- if (ndev->config_cb.callback)
+ if (ndev->nb_registered && ndev->config_cb.callback)
ndev->config_cb.callback(ndev->config_cb.private);
kfree(wqent);
@@ -3036,21 +3012,13 @@ static int event_handler(struct notifier_block *nb, unsigned long event, void *p
switch (eqe->sub_type) {
case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
- down_read(&ndev->reslock);
- if (!ndev->nb_registered) {
- up_read(&ndev->reslock);
- return NOTIFY_DONE;
- }
wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
- if (!wqent) {
- up_read(&ndev->reslock);
+ if (!wqent)
return NOTIFY_DONE;
- }
wqent->mvdev = &ndev->mvdev;
INIT_WORK(&wqent->work, update_carrier);
queue_work(ndev->mvdev.wq, &wqent->work);
- up_read(&ndev->reslock);
ret = NOTIFY_OK;
break;
default:
@@ -3185,7 +3153,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
goto err_mpfs;
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
- err = mlx5_vdpa_create_mr(mvdev, NULL);
+ err = mlx5_vdpa_create_mr(mvdev, NULL, 0);
if (err)
goto err_res;
}
@@ -3237,8 +3205,8 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
struct workqueue_struct *wq;
if (ndev->nb_registered) {
- mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
ndev->nb_registered = false;
+ mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
}
wq = mvdev->wq;
mvdev->wq = NULL;
diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
index febdc99b51a7..8ef7aa1365cc 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -855,7 +855,7 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms
features_device = vdev->config->get_device_features(vdev);
- if (nla_put_u64_64bit(msg, VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES, features_device,
+ if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_FEATURES, features_device,
VDPA_ATTR_PAD))
return -EMSGSIZE;
@@ -935,7 +935,6 @@ static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg,
{
struct virtio_net_config config = {};
u64 features;
- u16 max_vqp;
u8 status;
int err;
@@ -946,15 +945,15 @@ static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg,
}
vdpa_get_config_unlocked(vdev, 0, &config, sizeof(config));
- max_vqp = __virtio16_to_cpu(true, config.max_virtqueue_pairs);
- if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, max_vqp))
- return -EMSGSIZE;
-
features = vdev->config->get_driver_features(vdev);
if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES,
features, VDPA_ATTR_PAD))
return -EMSGSIZE;
+ err = vdpa_dev_net_mq_config_fill(msg, features, &config);
+ if (err)
+ return err;
+
if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index))
return -EMSGSIZE;
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index b071f0d842fb..cb88891b44a8 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -67,8 +67,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
{
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
- vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
- VDPASIM_QUEUE_MAX, false,
+ vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false,
(struct vring_desc *)(uintptr_t)vq->desc_addr,
(struct vring_avail *)
(uintptr_t)vq->driver_addr,
@@ -690,7 +689,9 @@ static void vdpasim_free(struct vdpa_device *vdpa)
}
kvfree(vdpasim->buffer);
- vhost_iotlb_free(vdpasim->iommu);
+ for (i = 0; i < vdpasim->dev_attr.nas; i++)
+ vhost_iotlb_reset(&vdpasim->iommu[i]);
+ kfree(vdpasim->iommu);
kfree(vdpasim->vqs);
kfree(vdpasim->config);
}
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
index c6db1a1baf76..f745926237a8 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
@@ -427,8 +427,10 @@ static int __init vdpasim_blk_init(void)
int ret;
ret = device_register(&vdpasim_blk_mgmtdev);
- if (ret)
+ if (ret) {
+ put_device(&vdpasim_blk_mgmtdev);
return ret;
+ }
ret = vdpa_mgmtdev_register(&mgmt_dev);
if (ret)
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
index c3cb225ea469..584b975a98a7 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
@@ -62,6 +62,9 @@ static bool receive_filter(struct vdpasim *vdpasim, size_t len)
if (len < ETH_ALEN + hdr_len)
return false;
+ if (is_broadcast_ether_addr(vdpasim->buffer + hdr_len) ||
+ is_multicast_ether_addr(vdpasim->buffer + hdr_len))
+ return true;
if (!strncmp(vdpasim->buffer + hdr_len, vio_config->mac, ETH_ALEN))
return true;
@@ -305,8 +308,10 @@ static int __init vdpasim_net_init(void)
int ret;
ret = device_register(&vdpasim_net_mgmtdev);
- if (ret)
+ if (ret) {
+ put_device(&vdpasim_net_mgmtdev);
return ret;
+ }
ret = vdpa_mgmtdev_register(&mgmt_dev);
if (ret)
diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index 0dd3c1f291da..0c3b48616a9f 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -1440,6 +1440,9 @@ static bool vduse_validate_config(struct vduse_dev_config *config)
if (config->config_size > PAGE_SIZE)
return false;
+ if (config->vq_num > 0xffff)
+ return false;
+
if (!device_is_allowed(config->device_id))
return false;
diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c
index d448db0c4de3..8fe267ca3e76 100644
--- a/drivers/vdpa/virtio_pci/vp_vdpa.c
+++ b/drivers/vdpa/virtio_pci/vp_vdpa.c
@@ -647,7 +647,7 @@ static void vp_vdpa_remove(struct pci_dev *pdev)
mdev = vp_vdpa_mgtdev->mdev;
vp_modern_remove(mdev);
vdpa_mgmtdev_unregister(&vp_vdpa_mgtdev->mgtdev);
- kfree(&vp_vdpa_mgtdev->mgtdev.id_table);
+ kfree(vp_vdpa_mgtdev->mgtdev.id_table);
kfree(mdev);
kfree(vp_vdpa_mgtdev);
}
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 23c24fe98c00..2209372f236d 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1856,24 +1856,33 @@ unwind:
* significantly boosts non-hugetlbfs mappings and doesn't seem to hurt when
* hugetlbfs is in use.
*/
-static void vfio_test_domain_fgsp(struct vfio_domain *domain)
+static void vfio_test_domain_fgsp(struct vfio_domain *domain, struct list_head *regions)
{
- struct page *pages;
int ret, order = get_order(PAGE_SIZE * 2);
+ struct vfio_iova *region;
+ struct page *pages;
+ dma_addr_t start;
pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
if (!pages)
return;
- ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2,
- IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE);
- if (!ret) {
- size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE);
+ list_for_each_entry(region, regions, list) {
+ start = ALIGN(region->start, PAGE_SIZE * 2);
+ if (start >= region->end || (region->end - start < PAGE_SIZE * 2))
+ continue;
- if (unmapped == PAGE_SIZE)
- iommu_unmap(domain->domain, PAGE_SIZE, PAGE_SIZE);
- else
- domain->fgsp = true;
+ ret = iommu_map(domain->domain, start, page_to_phys(pages), PAGE_SIZE * 2,
+ IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE);
+ if (!ret) {
+ size_t unmapped = iommu_unmap(domain->domain, start, PAGE_SIZE);
+
+ if (unmapped == PAGE_SIZE)
+ iommu_unmap(domain->domain, start + PAGE_SIZE, PAGE_SIZE);
+ else
+ domain->fgsp = true;
+ }
+ break;
}
__free_pages(pages, order);
@@ -2326,7 +2335,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
}
}
- vfio_test_domain_fgsp(domain);
+ vfio_test_domain_fgsp(domain, &iova_copy);
/* replay mappings on new domains */
ret = vfio_iommu_replay(iommu, domain);
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9af19b0cf3b7..4c538b30fd76 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1511,6 +1511,9 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
nvq = &n->vqs[index];
mutex_lock(&vq->mutex);
+ if (fd == -1)
+ vhost_clear_msg(&n->dev);
+
/* Verify that ring has been setup correctly. */
if (!vhost_vq_access_ok(vq)) {
r = -EFAULT;
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index dca6346d75b3..d5ecb8876fc9 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -80,7 +80,7 @@ struct vhost_scsi_cmd {
struct scatterlist *tvc_prot_sgl;
struct page **tvc_upages;
/* Pointer to response header iovec */
- struct iovec tvc_resp_iov;
+ struct iovec *tvc_resp_iov;
/* Pointer to vhost_scsi for our device */
struct vhost_scsi *tvc_vhost;
/* Pointer to vhost_virtqueue for the cmd */
@@ -563,7 +563,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
memcpy(v_rsp.sense, cmd->tvc_sense_buf,
se_cmd->scsi_sense_length);
- iov_iter_init(&iov_iter, ITER_DEST, &cmd->tvc_resp_iov,
+ iov_iter_init(&iov_iter, ITER_DEST, cmd->tvc_resp_iov,
cmd->tvc_in_iovs, sizeof(v_rsp));
ret = copy_to_iter(&v_rsp, sizeof(v_rsp), &iov_iter);
if (likely(ret == sizeof(v_rsp))) {
@@ -594,6 +594,7 @@ vhost_scsi_get_cmd(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg,
struct vhost_scsi_cmd *cmd;
struct vhost_scsi_nexus *tv_nexus;
struct scatterlist *sg, *prot_sg;
+ struct iovec *tvc_resp_iov;
struct page **pages;
int tag;
@@ -613,6 +614,7 @@ vhost_scsi_get_cmd(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg,
sg = cmd->tvc_sgl;
prot_sg = cmd->tvc_prot_sgl;
pages = cmd->tvc_upages;
+ tvc_resp_iov = cmd->tvc_resp_iov;
memset(cmd, 0, sizeof(*cmd));
cmd->tvc_sgl = sg;
cmd->tvc_prot_sgl = prot_sg;
@@ -625,6 +627,7 @@ vhost_scsi_get_cmd(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg,
cmd->tvc_data_direction = data_direction;
cmd->tvc_nexus = tv_nexus;
cmd->inflight = vhost_scsi_get_inflight(vq);
+ cmd->tvc_resp_iov = tvc_resp_iov;
memcpy(cmd->tvc_cdb, cdb, VHOST_SCSI_MAX_CDB_SIZE);
@@ -935,7 +938,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
struct iov_iter in_iter, prot_iter, data_iter;
u64 tag;
u32 exp_data_len, data_direction;
- int ret, prot_bytes, c = 0;
+ int ret, prot_bytes, i, c = 0;
u16 lun;
u8 task_attr;
bool t10_pi = vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI);
@@ -1092,7 +1095,8 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
}
cmd->tvc_vhost = vs;
cmd->tvc_vq = vq;
- cmd->tvc_resp_iov = vq->iov[vc.out];
+ for (i = 0; i < vc.in ; i++)
+ cmd->tvc_resp_iov[i] = vq->iov[vc.out + i];
cmd->tvc_in_iovs = vc.in;
pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n",
@@ -1461,6 +1465,7 @@ static void vhost_scsi_destroy_vq_cmds(struct vhost_virtqueue *vq)
kfree(tv_cmd->tvc_sgl);
kfree(tv_cmd->tvc_prot_sgl);
kfree(tv_cmd->tvc_upages);
+ kfree(tv_cmd->tvc_resp_iov);
}
sbitmap_free(&svq->scsi_tags);
@@ -1508,6 +1513,14 @@ static int vhost_scsi_setup_vq_cmds(struct vhost_virtqueue *vq, int max_cmds)
goto out;
}
+ tv_cmd->tvc_resp_iov = kcalloc(UIO_MAXIOV,
+ sizeof(struct iovec),
+ GFP_KERNEL);
+ if (!tv_cmd->tvc_resp_iov) {
+ pr_err("Unable to allocate tv_cmd->tvc_resp_iov\n");
+ goto out;
+ }
+
tv_cmd->tvc_prot_sgl = kcalloc(VHOST_SCSI_PREALLOC_PROT_SGLS,
sizeof(struct scatterlist),
GFP_KERNEL);
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 166044642fd5..ec32f785dfde 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -65,6 +65,10 @@ static DEFINE_IDA(vhost_vdpa_ida);
static dev_t vhost_vdpa_major;
+static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
+ struct vhost_iotlb *iotlb, u64 start,
+ u64 last, u32 asid);
+
static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb)
{
struct vhost_vdpa_as *as = container_of(iotlb, struct
@@ -135,7 +139,7 @@ static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
return -EINVAL;
hlist_del(&as->hash_link);
- vhost_iotlb_reset(&as->iotlb);
+ vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid);
kfree(as);
return 0;
@@ -683,10 +687,20 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
mutex_unlock(&d->mutex);
return r;
}
+static void vhost_vdpa_general_unmap(struct vhost_vdpa *v,
+ struct vhost_iotlb_map *map, u32 asid)
+{
+ struct vdpa_device *vdpa = v->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+ if (ops->dma_map) {
+ ops->dma_unmap(vdpa, asid, map->start, map->size);
+ } else if (ops->set_map == NULL) {
+ iommu_unmap(v->domain, map->start, map->size);
+ }
+}
-static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v,
- struct vhost_iotlb *iotlb,
- u64 start, u64 last)
+static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
+ u64 start, u64 last, u32 asid)
{
struct vhost_dev *dev = &v->vdev;
struct vhost_iotlb_map *map;
@@ -703,13 +717,13 @@ static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v,
unpin_user_page(page);
}
atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
+ vhost_vdpa_general_unmap(v, map, asid);
vhost_iotlb_map_free(iotlb, map);
}
}
-static void vhost_vdpa_va_unmap(struct vhost_vdpa *v,
- struct vhost_iotlb *iotlb,
- u64 start, u64 last)
+static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
+ u64 start, u64 last, u32 asid)
{
struct vhost_iotlb_map *map;
struct vdpa_map_file *map_file;
@@ -718,20 +732,21 @@ static void vhost_vdpa_va_unmap(struct vhost_vdpa *v,
map_file = (struct vdpa_map_file *)map->opaque;
fput(map_file->file);
kfree(map_file);
+ vhost_vdpa_general_unmap(v, map, asid);
vhost_iotlb_map_free(iotlb, map);
}
}
static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
- struct vhost_iotlb *iotlb,
- u64 start, u64 last)
+ struct vhost_iotlb *iotlb, u64 start,
+ u64 last, u32 asid)
{
struct vdpa_device *vdpa = v->vdpa;
if (vdpa->use_va)
- return vhost_vdpa_va_unmap(v, iotlb, start, last);
+ return vhost_vdpa_va_unmap(v, iotlb, start, last, asid);
- return vhost_vdpa_pa_unmap(v, iotlb, start, last);
+ return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid);
}
static int perm_to_iommu_flags(u32 perm)
@@ -798,17 +813,12 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v,
const struct vdpa_config_ops *ops = vdpa->config;
u32 asid = iotlb_to_asid(iotlb);
- vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1);
+ vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1, asid);
- if (ops->dma_map) {
- ops->dma_unmap(vdpa, asid, iova, size);
- } else if (ops->set_map) {
+ if (ops->set_map) {
if (!v->in_batch)
ops->set_map(vdpa, asid, iotlb);
- } else {
- iommu_unmap(v->domain, iova, size);
}
-
/* If we are in the middle of batch processing, delay the free
* of AS until BATCH_END.
*/
@@ -1162,14 +1172,14 @@ static void vhost_vdpa_cleanup(struct vhost_vdpa *v)
struct vhost_vdpa_as *as;
u32 asid;
- vhost_dev_cleanup(&v->vdev);
- kfree(v->vdev.vqs);
-
for (asid = 0; asid < v->vdpa->nas; asid++) {
as = asid_to_as(v, asid);
if (as)
vhost_vdpa_remove_as(v, asid);
}
+
+ vhost_dev_cleanup(&v->vdev);
+ kfree(v->vdev.vqs);
}
static int vhost_vdpa_open(struct inode *inode, struct file *filep)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 5c9fe3c9c364..43c9770b86e5 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -661,7 +661,7 @@ void vhost_dev_stop(struct vhost_dev *dev)
}
EXPORT_SYMBOL_GPL(vhost_dev_stop);
-static void vhost_clear_msg(struct vhost_dev *dev)
+void vhost_clear_msg(struct vhost_dev *dev)
{
struct vhost_msg_node *node, *n;
@@ -679,6 +679,7 @@ static void vhost_clear_msg(struct vhost_dev *dev)
spin_unlock(&dev->iotlb_lock);
}
+EXPORT_SYMBOL_GPL(vhost_clear_msg);
void vhost_dev_cleanup(struct vhost_dev *dev)
{
@@ -2053,7 +2054,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
struct vhost_dev *dev = vq->dev;
struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem;
struct iovec *_iov;
- u64 s = 0;
+ u64 s = 0, last = addr + len - 1;
int ret = 0;
while ((u64)len > s) {
@@ -2063,7 +2064,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
break;
}
- map = vhost_iotlb_itree_first(umem, addr, addr + len - 1);
+ map = vhost_iotlb_itree_first(umem, addr, last);
if (map == NULL || map->start > addr) {
if (umem != dev->iotlb) {
ret = -EFAULT;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index d9109107af08..790b296271f1 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -181,6 +181,7 @@ long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp);
long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp);
bool vhost_vq_access_ok(struct vhost_virtqueue *vq);
bool vhost_log_access_ok(struct vhost_dev *);
+void vhost_clear_msg(struct vhost_dev *dev);
int vhost_get_vq_desc(struct vhost_virtqueue *,
struct iovec iov[], unsigned int iov_count,
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index c9f5c8ea3afb..33eb941fcf15 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -1102,7 +1102,7 @@ static int iotlb_translate(const struct vringh *vrh,
struct vhost_iotlb_map *map;
struct vhost_iotlb *iotlb = vrh->iotlb;
int ret = 0;
- u64 s = 0;
+ u64 s = 0, last = addr + len - 1;
spin_lock(vrh->iotlb_lock);
@@ -1114,8 +1114,7 @@ static int iotlb_translate(const struct vringh *vrh,
break;
}
- map = vhost_iotlb_itree_first(iotlb, addr,
- addr + len - 1);
+ map = vhost_iotlb_itree_first(iotlb, addr, last);
if (!map || map->start > addr) {
ret = -EINVAL;
break;
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index cd6f7776013a..a2b374372363 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -959,7 +959,14 @@ static int __init vhost_vsock_init(void)
VSOCK_TRANSPORT_F_H2G);
if (ret < 0)
return ret;
- return misc_register(&vhost_vsock_misc);
+
+ ret = misc_register(&vhost_vsock_misc);
+ if (ret) {
+ vsock_core_unregister(&vhost_transport.transport);
+ return ret;
+ }
+
+ return 0;
};
static void __exit vhost_vsock_exit(void)
diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index df6e09f7d242..b2bed599e6c6 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -456,8 +456,8 @@ config FB_ATARI
chipset found in Ataris.
config FB_OF
- bool "Open Firmware frame buffer device support"
- depends on (FB = y) && PPC && (!PPC_PSERIES || PCI)
+ tristate "Open Firmware frame buffer device support"
+ depends on FB && PPC && (!PPC_PSERIES || PCI)
depends on !DRM_OFDRM
select APERTURE_HELPERS
select FB_CFB_FILLRECT
diff --git a/drivers/video/fbdev/atmel_lcdfb.c b/drivers/video/fbdev/atmel_lcdfb.c
index 1fc8de4ecbeb..8187a7c4f910 100644
--- a/drivers/video/fbdev/atmel_lcdfb.c
+++ b/drivers/video/fbdev/atmel_lcdfb.c
@@ -49,7 +49,6 @@ struct atmel_lcdfb_info {
struct clk *lcdc_clk;
struct backlight_device *backlight;
- u8 bl_power;
u8 saved_lcdcon;
u32 pseudo_palette[16];
@@ -109,22 +108,7 @@ static u32 contrast_ctr = ATMEL_LCDC_PS_DIV8
static int atmel_bl_update_status(struct backlight_device *bl)
{
struct atmel_lcdfb_info *sinfo = bl_get_data(bl);
- int power = sinfo->bl_power;
- int brightness = bl->props.brightness;
-
- /* REVISIT there may be a meaningful difference between
- * fb_blank and power ... there seem to be some cases
- * this doesn't handle correctly.
- */
- if (bl->props.fb_blank != sinfo->bl_power)
- power = bl->props.fb_blank;
- else if (bl->props.power != sinfo->bl_power)
- power = bl->props.power;
-
- if (brightness < 0 && power == FB_BLANK_UNBLANK)
- brightness = lcdc_readl(sinfo, ATMEL_LCDC_CONTRAST_VAL);
- else if (power != FB_BLANK_UNBLANK)
- brightness = 0;
+ int brightness = backlight_get_brightness(bl);
lcdc_writel(sinfo, ATMEL_LCDC_CONTRAST_VAL, brightness);
if (contrast_ctr & ATMEL_LCDC_POL_POSITIVE)
@@ -133,8 +117,6 @@ static int atmel_bl_update_status(struct backlight_device *bl)
else
lcdc_writel(sinfo, ATMEL_LCDC_CONTRAST_CTR, contrast_ctr);
- bl->props.fb_blank = bl->props.power = sinfo->bl_power = power;
-
return 0;
}
@@ -155,8 +137,6 @@ static void init_backlight(struct atmel_lcdfb_info *sinfo)
struct backlight_properties props;
struct backlight_device *bl;
- sinfo->bl_power = FB_BLANK_UNBLANK;
-
if (sinfo->backlight)
return;
diff --git a/drivers/video/fbdev/aty/aty128fb.c b/drivers/video/fbdev/aty/aty128fb.c
index dd31b9d7d337..36a9ac05a340 100644
--- a/drivers/video/fbdev/aty/aty128fb.c
+++ b/drivers/video/fbdev/aty/aty128fb.c
@@ -1766,12 +1766,10 @@ static int aty128_bl_update_status(struct backlight_device *bd)
unsigned int reg = aty_ld_le32(LVDS_GEN_CNTL);
int level;
- if (bd->props.power != FB_BLANK_UNBLANK ||
- bd->props.fb_blank != FB_BLANK_UNBLANK ||
- !par->lcd_on)
+ if (!par->lcd_on)
level = 0;
else
- level = bd->props.brightness;
+ level = backlight_get_brightness(bd);
reg |= LVDS_BL_MOD_EN | LVDS_BLON;
if (level > 0) {
diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c
index 0ccf5d401ecb..b02e4e645035 100644
--- a/drivers/video/fbdev/aty/atyfb_base.c
+++ b/drivers/video/fbdev/aty/atyfb_base.c
@@ -2219,13 +2219,7 @@ static int aty_bl_update_status(struct backlight_device *bd)
{
struct atyfb_par *par = bl_get_data(bd);
unsigned int reg = aty_ld_lcd(LCD_MISC_CNTL, par);
- int level;
-
- if (bd->props.power != FB_BLANK_UNBLANK ||
- bd->props.fb_blank != FB_BLANK_UNBLANK)
- level = 0;
- else
- level = bd->props.brightness;
+ int level = backlight_get_brightness(bd);
reg |= (BLMOD_EN | BIASMOD_EN);
if (level > 0) {
@@ -3192,8 +3186,7 @@ static void aty_init_lcd(struct atyfb_par *par, u32 bios_base)
* which we print to the screen.
*/
id = *(u8 *)par->lcd_table;
- strncpy(model, (char *)par->lcd_table+1, 24);
- model[23] = 0;
+ strscpy(model, (char *)par->lcd_table+1, sizeof(model));
width = par->lcd_width = *(u16 *)(par->lcd_table+25);
height = par->lcd_height = *(u16 *)(par->lcd_table+27);
diff --git a/drivers/video/fbdev/aty/radeon_backlight.c b/drivers/video/fbdev/aty/radeon_backlight.c
index d2c1263ad260..427adc838f77 100644
--- a/drivers/video/fbdev/aty/radeon_backlight.c
+++ b/drivers/video/fbdev/aty/radeon_backlight.c
@@ -57,11 +57,7 @@ static int radeon_bl_update_status(struct backlight_device *bd)
* backlight. This provides some greater power saving and the display
* is useless without backlight anyway.
*/
- if (bd->props.power != FB_BLANK_UNBLANK ||
- bd->props.fb_blank != FB_BLANK_UNBLANK)
- level = 0;
- else
- level = bd->props.brightness;
+ level = backlight_get_brightness(bd);
del_timer_sync(&rinfo->lvds_timer);
radeon_engine_idle();
diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c
index c730253ab85c..583cbcf09446 100644
--- a/drivers/video/fbdev/core/fb_defio.c
+++ b/drivers/video/fbdev/core/fb_defio.c
@@ -313,7 +313,7 @@ void fb_deferred_io_open(struct fb_info *info,
}
EXPORT_SYMBOL_GPL(fb_deferred_io_open);
-void fb_deferred_io_cleanup(struct fb_info *info)
+void fb_deferred_io_release(struct fb_info *info)
{
struct fb_deferred_io *fbdefio = info->fbdefio;
struct page *page;
@@ -327,6 +327,14 @@ void fb_deferred_io_cleanup(struct fb_info *info)
page = fb_deferred_io_page(info, i);
page->mapping = NULL;
}
+}
+EXPORT_SYMBOL_GPL(fb_deferred_io_release);
+
+void fb_deferred_io_cleanup(struct fb_info *info)
+{
+ struct fb_deferred_io *fbdefio = info->fbdefio;
+
+ fb_deferred_io_release(info);
kvfree(info->pagerefs);
mutex_destroy(&fbdefio->lock);
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 14a7d404062c..1b14c21af2b7 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -2495,9 +2495,12 @@ static int fbcon_set_font(struct vc_data *vc, struct console_font *font,
h > FBCON_SWAP(info->var.rotate, info->var.yres, info->var.xres))
return -EINVAL;
+ if (font->width > 32 || font->height > 32)
+ return -EINVAL;
+
/* Make sure drawing engine can handle the font */
- if (!(info->pixmap.blit_x & (1 << (font->width - 1))) ||
- !(info->pixmap.blit_y & (1 << (font->height - 1))))
+ if (!(info->pixmap.blit_x & BIT(font->width - 1)) ||
+ !(info->pixmap.blit_y & BIT(font->height - 1)))
return -EINVAL;
/* Make sure driver can handle the font length */
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 3a6c8458eb8d..ab3545a00abc 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -1454,6 +1454,10 @@ __releases(&info->lock)
struct fb_info * const info = file->private_data;
lock_fb_info(info);
+#if IS_ENABLED(CONFIG_FB_DEFERRED_IO)
+ if (info->fbdefio)
+ fb_deferred_io_release(info);
+#endif
if (info->fbops->fb_release)
info->fbops->fb_release(info,1);
module_put(info->fbops->owner);
diff --git a/drivers/video/fbdev/core/fbmon.c b/drivers/video/fbdev/core/fbmon.c
index b0e690f41025..79e5bfbdd34c 100644
--- a/drivers/video/fbdev/core/fbmon.c
+++ b/drivers/video/fbdev/core/fbmon.c
@@ -1050,7 +1050,7 @@ static u32 fb_get_vblank(u32 hfreq)
}
/**
- * fb_get_hblank_by_freq - get horizontal blank time given hfreq
+ * fb_get_hblank_by_hfreq - get horizontal blank time given hfreq
* @hfreq: horizontal freq
* @xres: horizontal resolution in pixels
*
diff --git a/drivers/video/fbdev/matrox/matroxfb_base.c b/drivers/video/fbdev/matrox/matroxfb_base.c
index 0d3cee7ae726..a043a737ea9f 100644
--- a/drivers/video/fbdev/matrox/matroxfb_base.c
+++ b/drivers/video/fbdev/matrox/matroxfb_base.c
@@ -1378,8 +1378,8 @@ static struct video_board vbG200 = {
.lowlevel = &matrox_G100
};
static struct video_board vbG200eW = {
- .maxvram = 0x100000,
- .maxdisplayable = 0x800000,
+ .maxvram = 0x1000000,
+ .maxdisplayable = 0x0800000,
.accelID = FB_ACCEL_MATROX_MGAG200,
.lowlevel = &matrox_G100
};
diff --git a/drivers/video/fbdev/mx3fb.c b/drivers/video/fbdev/mx3fb.c
index b945b68984b9..76771e126d0a 100644
--- a/drivers/video/fbdev/mx3fb.c
+++ b/drivers/video/fbdev/mx3fb.c
@@ -283,12 +283,7 @@ static int mx3fb_bl_get_brightness(struct backlight_device *bl)
static int mx3fb_bl_update_status(struct backlight_device *bl)
{
struct mx3fb_data *fbd = bl_get_data(bl);
- int brightness = bl->props.brightness;
-
- if (bl->props.power != FB_BLANK_UNBLANK)
- brightness = 0;
- if (bl->props.fb_blank != FB_BLANK_UNBLANK)
- brightness = 0;
+ int brightness = backlight_get_brightness(bl);
fbd->backlight_level = (fbd->backlight_level & ~0xFF) | brightness;
diff --git a/drivers/video/fbdev/nvidia/nv_backlight.c b/drivers/video/fbdev/nvidia/nv_backlight.c
index 2ce53529f636..503a7a683855 100644
--- a/drivers/video/fbdev/nvidia/nv_backlight.c
+++ b/drivers/video/fbdev/nvidia/nv_backlight.c
@@ -49,17 +49,11 @@ static int nvidia_bl_update_status(struct backlight_device *bd)
{
struct nvidia_par *par = bl_get_data(bd);
u32 tmp_pcrt, tmp_pmc, fpcontrol;
- int level;
+ int level = backlight_get_brightness(bd);
if (!par->FlatPanel)
return 0;
- if (bd->props.power != FB_BLANK_UNBLANK ||
- bd->props.fb_blank != FB_BLANK_UNBLANK)
- level = 0;
- else
- level = bd->props.brightness;
-
tmp_pmc = NV_RD32(par->PMC, 0x10F0) & 0x0000FFFF;
tmp_pcrt = NV_RD32(par->PCRTC0, 0x081C) & 0xFFFFFFFC;
fpcontrol = NV_RD32(par->PRAMDAC, 0x0848) & 0xCFFFFFCC;
diff --git a/drivers/video/fbdev/nvidia/nvidia.c b/drivers/video/fbdev/nvidia/nvidia.c
index 1960916098d4..e60a276b4855 100644
--- a/drivers/video/fbdev/nvidia/nvidia.c
+++ b/drivers/video/fbdev/nvidia/nvidia.c
@@ -1197,17 +1197,17 @@ static int nvidia_set_fbinfo(struct fb_info *info)
return nvidiafb_check_var(&info->var, info);
}
-static u32 nvidia_get_chipset(struct fb_info *info)
+static u32 nvidia_get_chipset(struct pci_dev *pci_dev,
+ volatile u32 __iomem *REGS)
{
- struct nvidia_par *par = info->par;
- u32 id = (par->pci_dev->vendor << 16) | par->pci_dev->device;
+ u32 id = (pci_dev->vendor << 16) | pci_dev->device;
printk(KERN_INFO PFX "Device ID: %x \n", id);
if ((id & 0xfff0) == 0x00f0 ||
(id & 0xfff0) == 0x02e0) {
/* pci-e */
- id = NV_RD32(par->REGS, 0x1800);
+ id = NV_RD32(REGS, 0x1800);
if ((id & 0x0000ffff) == 0x000010DE)
id = 0x10DE0000 | (id >> 16);
@@ -1220,12 +1220,11 @@ static u32 nvidia_get_chipset(struct fb_info *info)
return id;
}
-static u32 nvidia_get_arch(struct fb_info *info)
+static u32 nvidia_get_arch(u32 Chipset)
{
- struct nvidia_par *par = info->par;
u32 arch = 0;
- switch (par->Chipset & 0x0ff0) {
+ switch (Chipset & 0x0ff0) {
case 0x0100: /* GeForce 256 */
case 0x0110: /* GeForce2 MX */
case 0x0150: /* GeForce2 */
@@ -1278,16 +1277,44 @@ static int nvidiafb_probe(struct pci_dev *pd, const struct pci_device_id *ent)
struct fb_info *info;
unsigned short cmd;
int ret;
+ volatile u32 __iomem *REGS;
+ int Chipset;
+ u32 Architecture;
NVTRACE_ENTER();
assert(pd != NULL);
+ if (pci_enable_device(pd)) {
+ printk(KERN_ERR PFX "cannot enable PCI device\n");
+ return -ENODEV;
+ }
+
+ /* enable IO and mem if not already done */
+ pci_read_config_word(pd, PCI_COMMAND, &cmd);
+ cmd |= (PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
+ pci_write_config_word(pd, PCI_COMMAND, cmd);
+
+ nvidiafb_fix.mmio_start = pci_resource_start(pd, 0);
+ nvidiafb_fix.mmio_len = pci_resource_len(pd, 0);
+
+ REGS = ioremap(nvidiafb_fix.mmio_start, nvidiafb_fix.mmio_len);
+ if (!REGS) {
+ printk(KERN_ERR PFX "cannot ioremap MMIO base\n");
+ return -ENODEV;
+ }
+
+ Chipset = nvidia_get_chipset(pd, REGS);
+ Architecture = nvidia_get_arch(Chipset);
+ if (Architecture == 0) {
+ printk(KERN_ERR PFX "unknown NV_ARCH\n");
+ goto err_out;
+ }
+
ret = aperture_remove_conflicting_pci_devices(pd, "nvidiafb");
if (ret)
- return ret;
+ goto err_out;
info = framebuffer_alloc(sizeof(struct nvidia_par), &pd->dev);
-
if (!info)
goto err_out;
@@ -1298,11 +1325,6 @@ static int nvidiafb_probe(struct pci_dev *pd, const struct pci_device_id *ent)
if (info->pixmap.addr == NULL)
goto err_out_kfree;
- if (pci_enable_device(pd)) {
- printk(KERN_ERR PFX "cannot enable PCI device\n");
- goto err_out_enable;
- }
-
if (pci_request_regions(pd, "nvidiafb")) {
printk(KERN_ERR PFX "cannot request PCI regions\n");
goto err_out_enable;
@@ -1318,34 +1340,17 @@ static int nvidiafb_probe(struct pci_dev *pd, const struct pci_device_id *ent)
par->paneltweak = paneltweak;
par->reverse_i2c = reverse_i2c;
- /* enable IO and mem if not already done */
- pci_read_config_word(pd, PCI_COMMAND, &cmd);
- cmd |= (PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
- pci_write_config_word(pd, PCI_COMMAND, cmd);
-
- nvidiafb_fix.mmio_start = pci_resource_start(pd, 0);
nvidiafb_fix.smem_start = pci_resource_start(pd, 1);
- nvidiafb_fix.mmio_len = pci_resource_len(pd, 0);
-
- par->REGS = ioremap(nvidiafb_fix.mmio_start, nvidiafb_fix.mmio_len);
- if (!par->REGS) {
- printk(KERN_ERR PFX "cannot ioremap MMIO base\n");
- goto err_out_free_base0;
- }
+ par->REGS = REGS;
- par->Chipset = nvidia_get_chipset(info);
- par->Architecture = nvidia_get_arch(info);
-
- if (par->Architecture == 0) {
- printk(KERN_ERR PFX "unknown NV_ARCH\n");
- goto err_out_arch;
- }
+ par->Chipset = Chipset;
+ par->Architecture = Architecture;
sprintf(nvidiafb_fix.id, "NV%x", (pd->device & 0x0ff0) >> 4);
if (NVCommonSetup(info))
- goto err_out_arch;
+ goto err_out_free_base0;
par->FbAddress = nvidiafb_fix.smem_start;
par->FbMapSize = par->RamAmountKBytes * 1024;
@@ -1401,7 +1406,6 @@ static int nvidiafb_probe(struct pci_dev *pd, const struct pci_device_id *ent)
goto err_out_iounmap_fb;
}
-
printk(KERN_INFO PFX
"PCI nVidia %s framebuffer (%dMB @ 0x%lX)\n",
info->fix.id,
@@ -1415,15 +1419,14 @@ err_out_iounmap_fb:
err_out_free_base1:
fb_destroy_modedb(info->monspecs.modedb);
nvidia_delete_i2c_busses(par);
-err_out_arch:
- iounmap(par->REGS);
- err_out_free_base0:
+err_out_free_base0:
pci_release_regions(pd);
err_out_enable:
kfree(info->pixmap.addr);
err_out_kfree:
framebuffer_release(info);
err_out:
+ iounmap(REGS);
return -ENODEV;
}
diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c
index 17cda5765683..1f3df2055ff0 100644
--- a/drivers/video/fbdev/omap/omapfb_main.c
+++ b/drivers/video/fbdev/omap/omapfb_main.c
@@ -1447,7 +1447,7 @@ static int fbinfo_init(struct omapfb_device *fbdev, struct fb_info *info)
info->fbops = &omapfb_ops;
info->flags = FBINFO_FLAG_DEFAULT;
- strncpy(fix->id, MODULE_NAME, sizeof(fix->id));
+ strscpy(fix->id, MODULE_NAME, sizeof(fix->id));
info->pseudo_palette = fbdev->pseudo_palette;
@@ -1573,8 +1573,7 @@ static int omapfb_find_ctrl(struct omapfb_device *fbdev)
fbdev->ctrl = NULL;
- strncpy(name, conf->lcd.ctrl_name, sizeof(name) - 1);
- name[sizeof(name) - 1] = '\0';
+ strscpy(name, conf->lcd.ctrl_name, sizeof(name));
if (strcmp(name, "internal") == 0) {
fbdev->ctrl = fbdev->int_ctrl;
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
index 4fc4b26a8d30..ba94a0a7bd4f 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
@@ -331,13 +331,7 @@ static int dsicm_bl_update_status(struct backlight_device *dev)
struct panel_drv_data *ddata = dev_get_drvdata(&dev->dev);
struct omap_dss_device *in = ddata->in;
int r;
- int level;
-
- if (dev->props.fb_blank == FB_BLANK_UNBLANK &&
- dev->props.power == FB_BLANK_UNBLANK)
- level = dev->props.brightness;
- else
- level = 0;
+ int level = backlight_get_brightness(dev);
dev_dbg(&ddata->pdev->dev, "update brightness to %d\n", level);
diff --git a/drivers/video/fbdev/omap2/omapfb/dss/display-sysfs.c b/drivers/video/fbdev/omap2/omapfb/dss/display-sysfs.c
index bc5a44c2a144..ae937854403b 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/display-sysfs.c
+++ b/drivers/video/fbdev/omap2/omapfb/dss/display-sysfs.c
@@ -10,6 +10,7 @@
#define DSS_SUBSYS_NAME "DISPLAY"
#include <linux/kernel.h>
+#include <linux/kstrtox.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/sysfs.h>
@@ -36,7 +37,7 @@ static ssize_t display_enabled_store(struct omap_dss_device *dssdev,
int r;
bool enable;
- r = strtobool(buf, &enable);
+ r = kstrtobool(buf, &enable);
if (r)
return r;
@@ -73,7 +74,7 @@ static ssize_t display_tear_store(struct omap_dss_device *dssdev,
if (!dssdev->driver->enable_te || !dssdev->driver->get_te)
return -ENOENT;
- r = strtobool(buf, &te);
+ r = kstrtobool(buf, &te);
if (r)
return r;
@@ -183,7 +184,7 @@ static ssize_t display_mirror_store(struct omap_dss_device *dssdev,
if (!dssdev->driver->set_mirror || !dssdev->driver->get_mirror)
return -ENOENT;
- r = strtobool(buf, &mirror);
+ r = kstrtobool(buf, &mirror);
if (r)
return r;
diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dsi.c b/drivers/video/fbdev/omap2/omapfb/dss/dsi.c
index 54b0f034c2ed..7cddb7b8ae34 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/dsi.c
+++ b/drivers/video/fbdev/omap2/omapfb/dss/dsi.c
@@ -1536,22 +1536,28 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev,
{
struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
unsigned long flags;
- struct dsi_irq_stats stats;
+ struct dsi_irq_stats *stats;
+
+ stats = kzalloc(sizeof(*stats), GFP_KERNEL);
+ if (!stats) {
+ seq_printf(s, "out of memory\n");
+ return;
+ }
spin_lock_irqsave(&dsi->irq_stats_lock, flags);
- stats = dsi->irq_stats;
+ *stats = dsi->irq_stats;
memset(&dsi->irq_stats, 0, sizeof(dsi->irq_stats));
dsi->irq_stats.last_reset = jiffies;
spin_unlock_irqrestore(&dsi->irq_stats_lock, flags);
seq_printf(s, "period %u ms\n",
- jiffies_to_msecs(jiffies - stats.last_reset));
+ jiffies_to_msecs(jiffies - stats->last_reset));
- seq_printf(s, "irqs %d\n", stats.irq_count);
+ seq_printf(s, "irqs %d\n", stats->irq_count);
#define PIS(x) \
- seq_printf(s, "%-20s %10d\n", #x, stats.dsi_irqs[ffs(DSI_IRQ_##x)-1])
+ seq_printf(s, "%-20s %10d\n", #x, stats->dsi_irqs[ffs(DSI_IRQ_##x)-1])
seq_printf(s, "-- DSI%d interrupts --\n", dsi->module_id + 1);
PIS(VC0);
@@ -1575,10 +1581,10 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev,
#define PIS(x) \
seq_printf(s, "%-20s %10d %10d %10d %10d\n", #x, \
- stats.vc_irqs[0][ffs(DSI_VC_IRQ_##x)-1], \
- stats.vc_irqs[1][ffs(DSI_VC_IRQ_##x)-1], \
- stats.vc_irqs[2][ffs(DSI_VC_IRQ_##x)-1], \
- stats.vc_irqs[3][ffs(DSI_VC_IRQ_##x)-1]);
+ stats->vc_irqs[0][ffs(DSI_VC_IRQ_##x)-1], \
+ stats->vc_irqs[1][ffs(DSI_VC_IRQ_##x)-1], \
+ stats->vc_irqs[2][ffs(DSI_VC_IRQ_##x)-1], \
+ stats->vc_irqs[3][ffs(DSI_VC_IRQ_##x)-1]);
seq_printf(s, "-- VC interrupts --\n");
PIS(CS);
@@ -1594,7 +1600,7 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev,
#define PIS(x) \
seq_printf(s, "%-20s %10d\n", #x, \
- stats.cio_irqs[ffs(DSI_CIO_IRQ_##x)-1]);
+ stats->cio_irqs[ffs(DSI_CIO_IRQ_##x)-1]);
seq_printf(s, "-- CIO interrupts --\n");
PIS(ERRSYNCESC1);
@@ -1618,6 +1624,8 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev,
PIS(ULPSACTIVENOT_ALL0);
PIS(ULPSACTIVENOT_ALL1);
#undef PIS
+
+ kfree(stats);
}
static void dsi1_dump_irqs(struct seq_file *s)
diff --git a/drivers/video/fbdev/omap2/omapfb/dss/manager-sysfs.c b/drivers/video/fbdev/omap2/omapfb/dss/manager-sysfs.c
index ba21c4a2633d..1b644be5fe2e 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/manager-sysfs.c
+++ b/drivers/video/fbdev/omap2/omapfb/dss/manager-sysfs.c
@@ -10,6 +10,7 @@
#define DSS_SUBSYS_NAME "MANAGER"
#include <linux/kernel.h>
+#include <linux/kstrtox.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/platform_device.h>
@@ -246,7 +247,7 @@ static ssize_t manager_trans_key_enabled_store(struct omap_overlay_manager *mgr,
bool enable;
int r;
- r = strtobool(buf, &enable);
+ r = kstrtobool(buf, &enable);
if (r)
return r;
@@ -290,7 +291,7 @@ static ssize_t manager_alpha_blending_enabled_store(
if(!dss_has_feature(FEAT_ALPHA_FIXED_ZORDER))
return -ENODEV;
- r = strtobool(buf, &enable);
+ r = kstrtobool(buf, &enable);
if (r)
return r;
@@ -329,7 +330,7 @@ static ssize_t manager_cpr_enable_store(struct omap_overlay_manager *mgr,
if (!dss_has_feature(FEAT_CPR))
return -ENODEV;
- r = strtobool(buf, &enable);
+ r = kstrtobool(buf, &enable);
if (r)
return r;
diff --git a/drivers/video/fbdev/omap2/omapfb/dss/overlay-sysfs.c b/drivers/video/fbdev/omap2/omapfb/dss/overlay-sysfs.c
index 601c0beb6de9..1da4fb1c77b4 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/overlay-sysfs.c
+++ b/drivers/video/fbdev/omap2/omapfb/dss/overlay-sysfs.c
@@ -13,6 +13,7 @@
#include <linux/err.h>
#include <linux/sysfs.h>
#include <linux/kobject.h>
+#include <linux/kstrtox.h>
#include <linux/platform_device.h>
#include <video/omapfb_dss.h>
@@ -210,7 +211,7 @@ static ssize_t overlay_enabled_store(struct omap_overlay *ovl, const char *buf,
int r;
bool enable;
- r = strtobool(buf, &enable);
+ r = kstrtobool(buf, &enable);
if (r)
return r;
diff --git a/drivers/video/fbdev/omap2/omapfb/omapfb-sysfs.c b/drivers/video/fbdev/omap2/omapfb/omapfb-sysfs.c
index 06dc41aa0354..831b2c2fbdf9 100644
--- a/drivers/video/fbdev/omap2/omapfb/omapfb-sysfs.c
+++ b/drivers/video/fbdev/omap2/omapfb/omapfb-sysfs.c
@@ -15,6 +15,7 @@
#include <linux/uaccess.h>
#include <linux/platform_device.h>
#include <linux/kernel.h>
+#include <linux/kstrtox.h>
#include <linux/mm.h>
#include <linux/omapfb.h>
@@ -96,7 +97,7 @@ static ssize_t store_mirror(struct device *dev,
int r;
struct fb_var_screeninfo new_var;
- r = strtobool(buf, &mirror);
+ r = kstrtobool(buf, &mirror);
if (r)
return r;
diff --git a/drivers/video/fbdev/riva/fbdev.c b/drivers/video/fbdev/riva/fbdev.c
index 644278146d3b..41edc6e79460 100644
--- a/drivers/video/fbdev/riva/fbdev.c
+++ b/drivers/video/fbdev/riva/fbdev.c
@@ -293,13 +293,7 @@ static int riva_bl_update_status(struct backlight_device *bd)
{
struct riva_par *par = bl_get_data(bd);
U032 tmp_pcrt, tmp_pmc;
- int level;
-
- if (bd->props.power != FB_BLANK_UNBLANK ||
- bd->props.fb_blank != FB_BLANK_UNBLANK)
- level = 0;
- else
- level = bd->props.brightness;
+ int level = backlight_get_brightness(bd);
tmp_pmc = NV_RD32(par->riva.PMC, 0x10F0) & 0x0000FFFF;
tmp_pcrt = NV_RD32(par->riva.PCRTC0, 0x081C) & 0xFFFFFFFC;
diff --git a/drivers/video/fbdev/xen-fbfront.c b/drivers/video/fbdev/xen-fbfront.c
index 8752d389e382..d7f3e6281ce4 100644
--- a/drivers/video/fbdev/xen-fbfront.c
+++ b/drivers/video/fbdev/xen-fbfront.c
@@ -67,7 +67,7 @@ MODULE_PARM_DESC(video,
"Video memory size in MB, width, height in pixels (default 2,800,600)");
static void xenfb_make_preferred_console(void);
-static int xenfb_remove(struct xenbus_device *);
+static void xenfb_remove(struct xenbus_device *);
static void xenfb_init_shared_page(struct xenfb_info *, struct fb_info *);
static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *);
static void xenfb_disconnect_backend(struct xenfb_info *);
@@ -523,7 +523,7 @@ static int xenfb_resume(struct xenbus_device *dev)
return xenfb_connect_backend(dev, info);
}
-static int xenfb_remove(struct xenbus_device *dev)
+static void xenfb_remove(struct xenbus_device *dev)
{
struct xenfb_info *info = dev_get_drvdata(&dev->dev);
@@ -538,8 +538,6 @@ static int xenfb_remove(struct xenbus_device *dev)
vfree(info->gfns);
vfree(info->fb);
kfree(info);
-
- return 0;
}
static unsigned long vmalloc_to_gfn(void *address)
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 828ced060742..b9a80aedee1b 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -15,7 +15,7 @@ static ssize_t device_show(struct device *_d,
struct device_attribute *attr, char *buf)
{
struct virtio_device *dev = dev_to_virtio(_d);
- return sprintf(buf, "0x%04x\n", dev->id.device);
+ return sysfs_emit(buf, "0x%04x\n", dev->id.device);
}
static DEVICE_ATTR_RO(device);
@@ -23,7 +23,7 @@ static ssize_t vendor_show(struct device *_d,
struct device_attribute *attr, char *buf)
{
struct virtio_device *dev = dev_to_virtio(_d);
- return sprintf(buf, "0x%04x\n", dev->id.vendor);
+ return sysfs_emit(buf, "0x%04x\n", dev->id.vendor);
}
static DEVICE_ATTR_RO(vendor);
@@ -31,7 +31,7 @@ static ssize_t status_show(struct device *_d,
struct device_attribute *attr, char *buf)
{
struct virtio_device *dev = dev_to_virtio(_d);
- return sprintf(buf, "0x%08x\n", dev->config->get_status(dev));
+ return sysfs_emit(buf, "0x%08x\n", dev->config->get_status(dev));
}
static DEVICE_ATTR_RO(status);
@@ -39,7 +39,7 @@ static ssize_t modalias_show(struct device *_d,
struct device_attribute *attr, char *buf)
{
struct virtio_device *dev = dev_to_virtio(_d);
- return sprintf(buf, "virtio:d%08Xv%08X\n",
+ return sysfs_emit(buf, "virtio:d%08Xv%08X\n",
dev->id.device, dev->id.vendor);
}
static DEVICE_ATTR_RO(modalias);
@@ -54,9 +54,9 @@ static ssize_t features_show(struct device *_d,
/* We actually represent this as a bitstring, as it could be
* arbitrary length in future. */
for (i = 0; i < sizeof(dev->features)*8; i++)
- len += sprintf(buf+len, "%c",
+ len += sysfs_emit_at(buf, len, "%c",
__virtio_test_bit(dev, i) ? '1' : '0');
- len += sprintf(buf+len, "\n");
+ len += sysfs_emit_at(buf, len, "\n");
return len;
}
static DEVICE_ATTR_RO(features);
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index c3b9f2761849..9e496e288cfa 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -303,14 +303,14 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
int err;
if (index >= vp_modern_get_num_queues(mdev))
- return ERR_PTR(-ENOENT);
+ return ERR_PTR(-EINVAL);
/* Check if queue is either not available or already active. */
num = vp_modern_get_queue_size(mdev, index);
if (!num || vp_modern_get_queue_enable(mdev, index))
return ERR_PTR(-ENOENT);
- if (num & (num - 1)) {
+ if (!is_power_of_2(num)) {
dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", num);
return ERR_PTR(-EINVAL);
}
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 2e7689bb933b..723c4e29e1d3 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1052,7 +1052,7 @@ static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
dma_addr_t dma_addr;
/* We assume num is a power of 2. */
- if (num & (num - 1)) {
+ if (!is_power_of_2(num)) {
dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
return -EINVAL;
}
diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c
index f2ae2e563dc5..4a2ddf730a3a 100644
--- a/drivers/w1/w1.c
+++ b/drivers/w1/w1.c
@@ -1166,6 +1166,8 @@ int w1_process(void *data)
/* remainder if it woke up early */
unsigned long jremain = 0;
+ atomic_inc(&dev->refcnt);
+
for (;;) {
if (!jremain && dev->search_count) {
@@ -1193,8 +1195,10 @@ int w1_process(void *data)
*/
mutex_unlock(&dev->list_mutex);
- if (kthread_should_stop())
+ if (kthread_should_stop()) {
+ __set_current_state(TASK_RUNNING);
break;
+ }
/* Only sleep when the search is active. */
if (dev->search_count) {
diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c
index b3e1792d9c49..3a71c5eb2f83 100644
--- a/drivers/w1/w1_int.c
+++ b/drivers/w1/w1_int.c
@@ -51,10 +51,9 @@ static struct w1_master *w1_alloc_dev(u32 id, int slave_count, int slave_ttl,
dev->search_count = w1_search_count;
dev->enable_pullup = w1_enable_pullup;
- /* 1 for w1_process to decrement
- * 1 for __w1_remove_master_device to decrement
+ /* For __w1_remove_master_device to decrement
*/
- atomic_set(&dev->refcnt, 2);
+ atomic_set(&dev->refcnt, 1);
INIT_LIST_HEAD(&dev->slist);
INIT_LIST_HEAD(&dev->async_list);
diff --git a/drivers/watchdog/diag288_wdt.c b/drivers/watchdog/diag288_wdt.c
index 4cb10877017c..6ca5d9515d85 100644
--- a/drivers/watchdog/diag288_wdt.c
+++ b/drivers/watchdog/diag288_wdt.c
@@ -86,7 +86,7 @@ static int __diag288(unsigned int func, unsigned int timeout,
"1:\n"
EX_TABLE(0b, 1b)
: "+d" (err) : "d"(__func), "d"(__timeout),
- "d"(__action), "d"(__len) : "1", "cc");
+ "d"(__action), "d"(__len) : "1", "cc", "memory");
return err;
}
@@ -268,12 +268,21 @@ static int __init diag288_init(void)
char ebc_begin[] = {
194, 197, 199, 201, 213
};
+ char *ebc_cmd;
watchdog_set_nowayout(&wdt_dev, nowayout_info);
if (MACHINE_IS_VM) {
- if (__diag288_vm(WDT_FUNC_INIT, 15,
- ebc_begin, sizeof(ebc_begin)) != 0) {
+ ebc_cmd = kmalloc(sizeof(ebc_begin), GFP_KERNEL);
+ if (!ebc_cmd) {
+ pr_err("The watchdog cannot be initialized\n");
+ return -ENOMEM;
+ }
+ memcpy(ebc_cmd, ebc_begin, sizeof(ebc_begin));
+ ret = __diag288_vm(WDT_FUNC_INIT, 15,
+ ebc_cmd, sizeof(ebc_begin));
+ kfree(ebc_cmd);
+ if (ret != 0) {
pr_err("The watchdog cannot be initialized\n");
return -EINVAL;
}
diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index 28b2a1fa25ab..0d4f8f4f4948 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -1181,9 +1181,8 @@ static void pvcalls_back_changed(struct xenbus_device *dev,
}
}
-static int pvcalls_back_remove(struct xenbus_device *dev)
+static void pvcalls_back_remove(struct xenbus_device *dev)
{
- return 0;
}
static int pvcalls_back_uevent(struct xenbus_device *xdev,
diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 1826e8e67125..d5d589bda243 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -225,6 +225,8 @@ again:
return IRQ_HANDLED;
}
+static void free_active_ring(struct sock_mapping *map);
+
static void pvcalls_front_free_map(struct pvcalls_bedata *bedata,
struct sock_mapping *map)
{
@@ -240,7 +242,7 @@ static void pvcalls_front_free_map(struct pvcalls_bedata *bedata,
for (i = 0; i < (1 << PVCALLS_RING_ORDER); i++)
gnttab_end_foreign_access(map->active.ring->ref[i], NULL);
gnttab_end_foreign_access(map->active.ref, NULL);
- free_page((unsigned long)map->active.ring);
+ free_active_ring(map);
kfree(map);
}
@@ -1085,7 +1087,7 @@ static const struct xenbus_device_id pvcalls_front_ids[] = {
{ "" }
};
-static int pvcalls_front_remove(struct xenbus_device *dev)
+static void pvcalls_front_remove(struct xenbus_device *dev)
{
struct pvcalls_bedata *bedata;
struct sock_mapping *map = NULL, *n;
@@ -1121,7 +1123,6 @@ static int pvcalls_front_remove(struct xenbus_device *dev)
kfree(bedata->ring.sring);
kfree(bedata);
xenbus_switch_state(dev, XenbusStateClosed);
- return 0;
}
static int pvcalls_front_probe(struct xenbus_device *dev,
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index d171091eec12..b11e401f1b1e 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -716,14 +716,12 @@ out:
return err;
}
-static int xen_pcibk_xenbus_remove(struct xenbus_device *dev)
+static void xen_pcibk_xenbus_remove(struct xenbus_device *dev)
{
struct xen_pcibk_device *pdev = dev_get_drvdata(&dev->dev);
if (pdev != NULL)
free_pdev(pdev);
-
- return 0;
}
static const struct xenbus_device_id xen_pcibk_ids[] = {
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index 6106ed93817d..954188b0b858 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -1249,7 +1249,7 @@ static void scsiback_release_translation_entry(struct vscsibk_info *info)
spin_unlock_irqrestore(&info->v2p_lock, flags);
}
-static int scsiback_remove(struct xenbus_device *dev)
+static void scsiback_remove(struct xenbus_device *dev)
{
struct vscsibk_info *info = dev_get_drvdata(&dev->dev);
@@ -1261,8 +1261,6 @@ static int scsiback_remove(struct xenbus_device *dev)
gnttab_page_cache_shrink(&info->free_pages, 0);
dev_set_drvdata(&dev->dev, NULL);
-
- return 0;
}
static int scsiback_probe(struct xenbus_device *dev,
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index c397c51f80d9..eed551d8555f 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -139,7 +139,7 @@ struct posix_acl *v9fs_iop_get_inode_acl(struct inode *inode, int type, bool rcu
}
-struct posix_acl *v9fs_iop_get_acl(struct user_namespace *mnt_userns,
+struct posix_acl *v9fs_iop_get_acl(struct mnt_idmap *idmap,
struct dentry *dentry, int type)
{
struct v9fs_session_info *v9ses;
@@ -151,7 +151,7 @@ struct posix_acl *v9fs_iop_get_acl(struct user_namespace *mnt_userns,
return v9fs_get_cached_acl(d_inode(dentry), type);
}
-int v9fs_iop_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int v9fs_iop_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
int retval;
@@ -195,7 +195,7 @@ int v9fs_iop_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
goto err_out;
}
- if (!inode_owner_or_capable(&init_user_ns, inode)) {
+ if (!inode_owner_or_capable(&nop_mnt_idmap, inode)) {
retval = -EPERM;
goto err_out;
}
@@ -206,7 +206,7 @@ int v9fs_iop_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
struct iattr iattr = {};
struct posix_acl *acl_mode = acl;
- retval = posix_acl_update_mode(&init_user_ns, inode,
+ retval = posix_acl_update_mode(&nop_mnt_idmap, inode,
&iattr.ia_mode,
&acl_mode);
if (retval)
@@ -225,7 +225,7 @@ int v9fs_iop_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
* FIXME should we update ctime ?
* What is the following setxattr update the mode ?
*/
- v9fs_vfs_setattr_dotl(&init_user_ns, dentry, &iattr);
+ v9fs_vfs_setattr_dotl(&nop_mnt_idmap, dentry, &iattr);
}
break;
case ACL_TYPE_DEFAULT:
diff --git a/fs/9p/acl.h b/fs/9p/acl.h
index 4c60a2bce5de..333cfcc281da 100644
--- a/fs/9p/acl.h
+++ b/fs/9p/acl.h
@@ -10,9 +10,9 @@
int v9fs_get_acl(struct inode *inode, struct p9_fid *fid);
struct posix_acl *v9fs_iop_get_inode_acl(struct inode *inode, int type,
bool rcu);
-struct posix_acl *v9fs_iop_get_acl(struct user_namespace *mnt_userns,
+struct posix_acl *v9fs_iop_get_acl(struct mnt_idmap *idmap,
struct dentry *dentry, int type);
-int v9fs_iop_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int v9fs_iop_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type);
int v9fs_acl_chmod(struct inode *inode, struct p9_fid *fid);
int v9fs_set_create_acl(struct inode *inode, struct p9_fid *fid,
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 6acabc2e7dc9..f3f74d197b5d 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -151,7 +151,7 @@ extern struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags);
extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d);
extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d);
-extern int v9fs_vfs_rename(struct user_namespace *mnt_userns,
+extern int v9fs_vfs_rename(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags);
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index bc417da7e9c1..75106b9f293d 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -60,7 +60,7 @@ void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
int v9fs_uflags2omode(int uflags, int extended);
void v9fs_blank_wstat(struct p9_wstat *wstat);
-int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
+int v9fs_vfs_setattr_dotl(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *iattr);
int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
int datasync);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index b740017634ef..b6ba22975781 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -9,6 +9,7 @@
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/sched.h>
#include <linux/file.h>
#include <linux/stat.h>
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 27a04a226d97..4344e7a7865f 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -260,7 +260,7 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
{
int err = 0;
- inode_init_owner(&init_user_ns, inode, NULL, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, NULL, mode);
inode->i_blocks = 0;
inode->i_rdev = rdev;
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
@@ -672,7 +672,7 @@ error:
/**
* v9fs_vfs_create - VFS hook to create a regular file
- * @mnt_userns: The user namespace of the mount
+ * @idmap: idmap of the mount
* @dir: The parent directory
* @dentry: The name of file to be created
* @mode: The UNIX file mode to set
@@ -684,7 +684,7 @@ error:
*/
static int
-v9fs_vfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+v9fs_vfs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
@@ -704,14 +704,14 @@ v9fs_vfs_create(struct user_namespace *mnt_userns, struct inode *dir,
/**
* v9fs_vfs_mkdir - VFS mkdir hook to create a directory
- * @mnt_userns: The user namespace of the mount
+ * @idmap: idmap of the mount
* @dir: inode that is being unlinked
* @dentry: dentry that is being unlinked
* @mode: mode for new directory
*
*/
-static int v9fs_vfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int v9fs_vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
int err;
@@ -908,7 +908,7 @@ int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
/**
* v9fs_vfs_rename - VFS hook to rename an inode
- * @mnt_userns: The user namespace of the mount
+ * @idmap: The idmap of the mount
* @old_dir: old dir inode
* @old_dentry: old dentry
* @new_dir: new dir inode
@@ -918,7 +918,7 @@ int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
*/
int
-v9fs_vfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+v9fs_vfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
@@ -1018,7 +1018,7 @@ error:
/**
* v9fs_vfs_getattr - retrieve file metadata
- * @mnt_userns: The user namespace of the mount
+ * @idmap: idmap of the mount
* @path: Object to query
* @stat: metadata structure to populate
* @request_mask: Mask of STATX_xxx flags indicating the caller's interests
@@ -1027,7 +1027,7 @@ error:
*/
static int
-v9fs_vfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+v9fs_vfs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct dentry *dentry = path->dentry;
@@ -1038,7 +1038,7 @@ v9fs_vfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
v9ses = v9fs_dentry2v9ses(dentry);
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
- generic_fillattr(&init_user_ns, d_inode(dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat);
return 0;
}
fid = v9fs_fid_lookup(dentry);
@@ -1051,7 +1051,7 @@ v9fs_vfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
return PTR_ERR(st);
v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb, 0);
- generic_fillattr(&init_user_ns, d_inode(dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat);
p9stat_free(st);
kfree(st);
@@ -1060,13 +1060,13 @@ v9fs_vfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
/**
* v9fs_vfs_setattr - set file metadata
- * @mnt_userns: The user namespace of the mount
+ * @idmap: idmap of the mount
* @dentry: file whose metadata to set
* @iattr: metadata assignment structure
*
*/
-static int v9fs_vfs_setattr(struct user_namespace *mnt_userns,
+static int v9fs_vfs_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *iattr)
{
int retval, use_dentry = 0;
@@ -1077,7 +1077,7 @@ static int v9fs_vfs_setattr(struct user_namespace *mnt_userns,
struct p9_wstat wstat;
p9_debug(P9_DEBUG_VFS, "\n");
- retval = setattr_prepare(&init_user_ns, dentry, iattr);
+ retval = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
if (retval)
return retval;
@@ -1135,7 +1135,7 @@ static int v9fs_vfs_setattr(struct user_namespace *mnt_userns,
v9fs_invalidate_inode_attr(inode);
- setattr_copy(&init_user_ns, inode, iattr);
+ setattr_copy(&nop_mnt_idmap, inode, iattr);
mark_inode_dirty(inode);
return 0;
}
@@ -1300,7 +1300,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
/**
* v9fs_vfs_symlink - helper function to create symlinks
- * @mnt_userns: The user namespace of the mount
+ * @idmap: idmap of the mount
* @dir: directory inode containing symlink
* @dentry: dentry for symlink
* @symname: symlink data
@@ -1310,7 +1310,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
*/
static int
-v9fs_vfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+v9fs_vfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
p9_debug(P9_DEBUG_VFS, " %lu,%pd,%s\n",
@@ -1356,7 +1356,7 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
/**
* v9fs_vfs_mknod - create a special file
- * @mnt_userns: The user namespace of the mount
+ * @idmap: idmap of the mount
* @dir: inode destination for new link
* @dentry: dentry for file
* @mode: mode for creation
@@ -1365,7 +1365,7 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
*/
static int
-v9fs_vfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+v9fs_vfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index f806b3f11649..3bed3eb3a0e2 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -30,7 +30,7 @@
#include "acl.h"
static int
-v9fs_vfs_mknod_dotl(struct user_namespace *mnt_userns, struct inode *dir,
+v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t omode, dev_t rdev);
/**
@@ -211,7 +211,7 @@ int v9fs_open_to_dotl_flags(int flags)
/**
* v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol.
- * @mnt_userns: The user namespace of the mount
+ * @idmap: The user namespace of the mount
* @dir: directory inode that is being created
* @dentry: dentry that is being deleted
* @omode: create permissions
@@ -219,10 +219,10 @@ int v9fs_open_to_dotl_flags(int flags)
*
*/
static int
-v9fs_vfs_create_dotl(struct user_namespace *mnt_userns, struct inode *dir,
+v9fs_vfs_create_dotl(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t omode, bool excl)
{
- return v9fs_vfs_mknod_dotl(mnt_userns, dir, dentry, omode, 0);
+ return v9fs_vfs_mknod_dotl(idmap, dir, dentry, omode, 0);
}
static int
@@ -356,14 +356,14 @@ out:
/**
* v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory
- * @mnt_userns: The user namespace of the mount
+ * @idmap: The idmap of the mount
* @dir: inode that is being unlinked
* @dentry: dentry that is being unlinked
* @omode: mode for new directory
*
*/
-static int v9fs_vfs_mkdir_dotl(struct user_namespace *mnt_userns,
+static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry,
umode_t omode)
{
@@ -450,7 +450,7 @@ error:
}
static int
-v9fs_vfs_getattr_dotl(struct user_namespace *mnt_userns,
+v9fs_vfs_getattr_dotl(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
@@ -462,7 +462,7 @@ v9fs_vfs_getattr_dotl(struct user_namespace *mnt_userns,
p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
v9ses = v9fs_dentry2v9ses(dentry);
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
- generic_fillattr(&init_user_ns, d_inode(dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat);
return 0;
}
fid = v9fs_fid_lookup(dentry);
@@ -479,7 +479,7 @@ v9fs_vfs_getattr_dotl(struct user_namespace *mnt_userns,
return PTR_ERR(st);
v9fs_stat2inode_dotl(st, d_inode(dentry), 0);
- generic_fillattr(&init_user_ns, d_inode(dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat);
/* Change block size to what the server returned */
stat->blksize = st->st_blksize;
@@ -529,13 +529,13 @@ static int v9fs_mapped_iattr_valid(int iattr_valid)
/**
* v9fs_vfs_setattr_dotl - set file metadata
- * @mnt_userns: The user namespace of the mount
+ * @idmap: idmap of the mount
* @dentry: file whose metadata to set
* @iattr: metadata assignment structure
*
*/
-int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
+int v9fs_vfs_setattr_dotl(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *iattr)
{
int retval, use_dentry = 0;
@@ -548,7 +548,7 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
p9_debug(P9_DEBUG_VFS, "\n");
- retval = setattr_prepare(&init_user_ns, dentry, iattr);
+ retval = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
if (retval)
return retval;
@@ -597,7 +597,7 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
truncate_setsize(inode, iattr->ia_size);
v9fs_invalidate_inode_attr(inode);
- setattr_copy(&init_user_ns, inode, iattr);
+ setattr_copy(&nop_mnt_idmap, inode, iattr);
mark_inode_dirty(inode);
if (iattr->ia_valid & ATTR_MODE) {
/* We also want to update ACL when we update mode bits */
@@ -687,7 +687,7 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
}
static int
-v9fs_vfs_symlink_dotl(struct user_namespace *mnt_userns, struct inode *dir,
+v9fs_vfs_symlink_dotl(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
int err;
@@ -817,7 +817,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
/**
* v9fs_vfs_mknod_dotl - create a special file
- * @mnt_userns: The user namespace of the mount
+ * @idmap: The idmap of the mount
* @dir: inode destination for new link
* @dentry: dentry for file
* @omode: mode for creation
@@ -825,7 +825,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
*
*/
static int
-v9fs_vfs_mknod_dotl(struct user_namespace *mnt_userns, struct inode *dir,
+v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t omode, dev_t rdev)
{
int err;
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index b6984311e00a..50f7f3f6b55e 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -150,7 +150,7 @@ static int v9fs_xattr_handler_get(const struct xattr_handler *handler,
}
static int v9fs_xattr_handler_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/Makefile b/fs/Makefile
index 4dea17840761..76abc9e055bd 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -16,7 +16,7 @@ obj-y := open.o read_write.o file_table.o super.o \
pnode.o splice.o sync.o utimes.o d_path.o \
stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \
fs_types.o fs_context.o fs_parser.o fsopen.o init.o \
- kernel_read_file.o remap_range.o
+ kernel_read_file.o mnt_idmapping.o remap_range.o
ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o direct-io.o mpage.o
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 06b7c92343ad..223f0283d20f 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -144,7 +144,7 @@ struct adfs_discmap {
/* Inode stuff */
struct inode *adfs_iget(struct super_block *sb, struct object_info *obj);
int adfs_write_inode(struct inode *inode, struct writeback_control *wbc);
-int adfs_notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
+int adfs_notify_change(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr);
/* map.c */
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index ee22278b0cfc..c3ac613d0975 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -294,7 +294,7 @@ out:
* later.
*/
int
-adfs_notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
+adfs_notify_change(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
@@ -302,7 +302,7 @@ adfs_notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
unsigned int ia_valid = attr->ia_valid;
int error;
- error = setattr_prepare(&init_user_ns, dentry, attr);
+ error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
/*
* we can't change the UID or GID of any file -
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index bfa89e131ead..60685ec76d98 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -167,17 +167,17 @@ extern const struct export_operations affs_export_ops;
extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len);
extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int);
extern int affs_unlink(struct inode *dir, struct dentry *dentry);
-extern int affs_create(struct user_namespace *mnt_userns, struct inode *dir,
+extern int affs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool);
-extern int affs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+extern int affs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode);
extern int affs_rmdir(struct inode *dir, struct dentry *dentry);
extern int affs_link(struct dentry *olddentry, struct inode *dir,
struct dentry *dentry);
-extern int affs_symlink(struct user_namespace *mnt_userns,
+extern int affs_symlink(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry,
const char *symname);
-extern int affs_rename2(struct user_namespace *mnt_userns,
+extern int affs_rename2(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags);
@@ -185,7 +185,7 @@ extern int affs_rename2(struct user_namespace *mnt_userns,
/* inode.c */
extern struct inode *affs_new_inode(struct inode *dir);
-extern int affs_notify_change(struct user_namespace *mnt_userns,
+extern int affs_notify_change(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr);
extern void affs_evict_inode(struct inode *inode);
extern struct inode *affs_iget(struct super_block *sb,
diff --git a/fs/affs/file.c b/fs/affs/file.c
index cefa222f7881..8daeed31e1af 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -880,7 +880,7 @@ affs_truncate(struct inode *inode)
if (inode->i_size > AFFS_I(inode)->mmu_private) {
struct address_space *mapping = inode->i_mapping;
struct page *page;
- void *fsdata;
+ void *fsdata = NULL;
loff_t isize = inode->i_size;
int res;
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 2352a75bd9d6..27f77a52c5c8 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -216,7 +216,7 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc)
}
int
-affs_notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
+affs_notify_change(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
@@ -224,7 +224,7 @@ affs_notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
pr_debug("notify_change(%lu,0x%x)\n", inode->i_ino, attr->ia_valid);
- error = setattr_prepare(&init_user_ns, dentry, attr);
+ error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (error)
goto out;
@@ -250,7 +250,7 @@ affs_notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
affs_truncate(inode);
}
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(&nop_mnt_idmap, inode, attr);
mark_inode_dirty(inode);
if (attr->ia_valid & ATTR_MODE)
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index bcab18956b4f..d12ccfd2a83d 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -242,7 +242,7 @@ affs_unlink(struct inode *dir, struct dentry *dentry)
}
int
-affs_create(struct user_namespace *mnt_userns, struct inode *dir,
+affs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
struct super_block *sb = dir->i_sb;
@@ -274,7 +274,7 @@ affs_create(struct user_namespace *mnt_userns, struct inode *dir,
}
int
-affs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+affs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct inode *inode;
@@ -313,7 +313,7 @@ affs_rmdir(struct inode *dir, struct dentry *dentry)
}
int
-affs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+affs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
struct super_block *sb = dir->i_sb;
@@ -503,7 +503,7 @@ done:
return retval;
}
-int affs_rename2(struct user_namespace *mnt_userns, struct inode *old_dir,
+int affs_rename2(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 7dcd59693a0c..d4ddb20d6732 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -13,6 +13,8 @@
#include "internal.h"
#include "afs_cm.h"
#include "protocol_yfs.h"
+#define RXRPC_TRACE_ONLY_DEFINE_ENUMS
+#include <trace/events/rxrpc.h>
static int afs_deliver_cb_init_call_back_state(struct afs_call *);
static int afs_deliver_cb_init_call_back_state3(struct afs_call *);
@@ -191,7 +193,7 @@ static void afs_cm_destructor(struct afs_call *call)
* Abort a service call from within an action function.
*/
static void afs_abort_service_call(struct afs_call *call, u32 abort_code, int error,
- const char *why)
+ enum rxrpc_abort_reason why)
{
rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
abort_code, error, why);
@@ -469,7 +471,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0)
afs_send_empty_reply(call);
else
- afs_abort_service_call(call, 1, 1, "K-1");
+ afs_abort_service_call(call, 1, 1, afs_abort_probeuuid_negative);
afs_put_call(call);
_leave("");
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index b7c1f8c84b38..82690d1dd49a 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -28,17 +28,17 @@ static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name, in
loff_t fpos, u64 ino, unsigned dtype);
static bool afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen,
loff_t fpos, u64 ino, unsigned dtype);
-static int afs_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int afs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl);
-static int afs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int afs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode);
static int afs_rmdir(struct inode *dir, struct dentry *dentry);
static int afs_unlink(struct inode *dir, struct dentry *dentry);
static int afs_link(struct dentry *from, struct inode *dir,
struct dentry *dentry);
-static int afs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *content);
-static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags);
static bool afs_dir_release_folio(struct folio *folio, gfp_t gfp_flags);
@@ -1332,7 +1332,7 @@ static const struct afs_operation_ops afs_mkdir_operation = {
/*
* create a directory on an AFS filesystem
*/
-static int afs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int afs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct afs_operation *op;
@@ -1630,7 +1630,7 @@ static const struct afs_operation_ops afs_create_operation = {
/*
* create a regular file on an AFS filesystem
*/
-static int afs_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int afs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
struct afs_operation *op;
@@ -1760,7 +1760,7 @@ static const struct afs_operation_ops afs_symlink_operation = {
/*
* create a symlink in an AFS filesystem
*/
-static int afs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *content)
{
struct afs_operation *op;
@@ -1897,7 +1897,7 @@ static const struct afs_operation_ops afs_rename_operation = {
/*
* rename a file in an AFS filesystem and/or move it between directories
*/
-static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index bbcc5afd1576..9c6dea3139f5 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -451,7 +451,7 @@ static int afs_do_setlk_check(struct afs_vnode *vnode, struct key *key,
*/
static int afs_do_setlk(struct file *file, struct file_lock *fl)
{
- struct inode *inode = locks_inode(file);
+ struct inode *inode = file_inode(file);
struct afs_vnode *vnode = AFS_FS_I(inode);
enum afs_flock_mode mode = AFS_FS_S(inode->i_sb)->flock_mode;
afs_lock_type_t type;
@@ -701,7 +701,7 @@ error:
*/
static int afs_do_unlk(struct file *file, struct file_lock *fl)
{
- struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
int ret;
_enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
@@ -721,7 +721,7 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl)
*/
static int afs_do_getlk(struct file *file, struct file_lock *fl)
{
- struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
struct key *key = afs_file_key(file);
int ret, lock_count;
@@ -763,7 +763,7 @@ error:
*/
int afs_lock(struct file *file, int cmd, struct file_lock *fl)
{
- struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
enum afs_flock_operation op;
int ret;
@@ -798,7 +798,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
*/
int afs_flock(struct file *file, int cmd, struct file_lock *fl)
{
- struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
enum afs_flock_operation op;
int ret;
@@ -843,7 +843,7 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl)
*/
static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl)
{
- struct afs_vnode *vnode = AFS_FS_I(locks_inode(fl->fl_file));
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(fl->fl_file));
_enter("");
@@ -861,7 +861,7 @@ static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl)
*/
static void afs_fl_release_private(struct file_lock *fl)
{
- struct afs_vnode *vnode = AFS_FS_I(locks_inode(fl->fl_file));
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(fl->fl_file));
_enter("");
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 6d3a3dbe4928..0167e96e5198 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -737,7 +737,7 @@ error_unlock:
/*
* read the attributes of an inode
*/
-int afs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int afs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
@@ -761,7 +761,7 @@ int afs_getattr(struct user_namespace *mnt_userns, const struct path *path,
do {
read_seqbegin_or_lock(&vnode->cb_lock, &seq);
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
if (test_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags) &&
stat->nlink > 0)
stat->nlink -= 1;
@@ -870,7 +870,7 @@ static const struct afs_operation_ops afs_setattr_operation = {
/*
* set the attributes of an inode
*/
-int afs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int afs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
const unsigned int supported =
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index fd8567b98e2b..ad8523d0d038 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/ktime.h>
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/pagemap.h>
#include <linux/rxrpc.h>
#include <linux/key.h>
@@ -1170,9 +1171,9 @@ extern struct inode *afs_iget(struct afs_operation *, struct afs_vnode_param *);
extern struct inode *afs_root_iget(struct super_block *, struct key *);
extern bool afs_check_validity(struct afs_vnode *);
extern int afs_validate(struct afs_vnode *, struct key *);
-extern int afs_getattr(struct user_namespace *mnt_userns, const struct path *,
+extern int afs_getattr(struct mnt_idmap *idmap, const struct path *,
struct kstat *, u32, unsigned int);
-extern int afs_setattr(struct user_namespace *mnt_userns, struct dentry *, struct iattr *);
+extern int afs_setattr(struct mnt_idmap *idmap, struct dentry *, struct iattr *);
extern void afs_evict_inode(struct inode *);
extern int afs_drop_inode(struct inode *);
@@ -1387,7 +1388,7 @@ extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int,
extern struct key *afs_request_key(struct afs_cell *);
extern struct key *afs_request_key_rcu(struct afs_cell *);
extern int afs_check_permit(struct afs_vnode *, struct key *, afs_access_t *);
-extern int afs_permission(struct user_namespace *, struct inode *, int);
+extern int afs_permission(struct mnt_idmap *, struct inode *, int);
extern void __exit afs_clean_up_permit_cache(void);
/*
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index c62939e5ea1f..7817e2b860e5 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -13,6 +13,8 @@
#include "internal.h"
#include "afs_cm.h"
#include "protocol_yfs.h"
+#define RXRPC_TRACE_ONLY_DEFINE_ENUMS
+#include <trace/events/rxrpc.h>
struct workqueue_struct *afs_async_calls;
@@ -397,7 +399,8 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
error_do_abort:
if (ret != -ECONNABORTED) {
rxrpc_kernel_abort_call(call->net->socket, rxcall,
- RX_USER_ABORT, ret, "KSD");
+ RX_USER_ABORT, ret,
+ afs_abort_send_data_error);
} else {
len = 0;
iov_iter_kvec(&msg.msg_iter, ITER_DEST, NULL, 0, 0);
@@ -527,7 +530,8 @@ static void afs_deliver_to_call(struct afs_call *call)
case -ENOTSUPP:
abort_code = RXGEN_OPCODE;
rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
- abort_code, ret, "KIV");
+ abort_code, ret,
+ afs_abort_op_not_supported);
goto local_abort;
case -EIO:
pr_err("kAFS: Call %u in bad state %u\n",
@@ -542,12 +546,14 @@ static void afs_deliver_to_call(struct afs_call *call)
if (state != AFS_CALL_CL_AWAIT_REPLY)
abort_code = RXGEN_SS_UNMARSHAL;
rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
- abort_code, ret, "KUM");
+ abort_code, ret,
+ afs_abort_unmarshal_error);
goto local_abort;
default:
abort_code = RX_CALL_DEAD;
rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
- abort_code, ret, "KER");
+ abort_code, ret,
+ afs_abort_general_error);
goto local_abort;
}
}
@@ -619,7 +625,8 @@ long afs_wait_for_call_to_complete(struct afs_call *call,
/* Kill off the call if it's still live. */
_debug("call interrupted");
if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
- RX_USER_ABORT, -EINTR, "KWI"))
+ RX_USER_ABORT, -EINTR,
+ afs_abort_interrupted))
afs_set_call_complete(call, -EINTR, 0);
}
}
@@ -836,7 +843,8 @@ void afs_send_empty_reply(struct afs_call *call)
case -ENOMEM:
_debug("oom");
rxrpc_kernel_abort_call(net->socket, call->rxcall,
- RXGEN_SS_MARSHAL, -ENOMEM, "KOO");
+ RXGEN_SS_MARSHAL, -ENOMEM,
+ afs_abort_oom);
fallthrough;
default:
_leave(" [error]");
@@ -878,7 +886,8 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
if (n == -ENOMEM) {
_debug("oom");
rxrpc_kernel_abort_call(net->socket, call->rxcall,
- RXGEN_SS_MARSHAL, -ENOMEM, "KOO");
+ RXGEN_SS_MARSHAL, -ENOMEM,
+ afs_abort_oom);
}
_leave(" [error]");
}
@@ -900,6 +909,7 @@ int afs_extract_data(struct afs_call *call, bool want_more)
ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, iter,
&call->iov_len, want_more, &remote_abort,
&call->service_id);
+ trace_afs_receive_data(call, call->iter, want_more, ret);
if (ret == 0 || ret == -EAGAIN)
return ret;
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 7c6a63a30394..6a7744c9e2a2 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -395,7 +395,7 @@ int afs_check_permit(struct afs_vnode *vnode, struct key *key,
* - AFS ACLs are attached to directories only, and a file is controlled by its
* parent directory's ACL
*/
-int afs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+int afs_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask)
{
struct afs_vnode *vnode = AFS_FS_I(inode);
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
index 7751b0b3f81d..9048d8ccc715 100644
--- a/fs/afs/xattr.c
+++ b/fs/afs/xattr.c
@@ -97,7 +97,7 @@ static const struct afs_operation_ops afs_store_acl_operation = {
* Set a file's AFS3 ACL.
*/
static int afs_xattr_set_acl(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry,
struct inode *inode, const char *name,
const void *buffer, size_t size, int flags)
@@ -228,7 +228,7 @@ static const struct afs_operation_ops yfs_store_opaque_acl2_operation = {
* Set a file's YFS ACL.
*/
static int afs_xattr_set_yfs(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry,
struct inode *inode, const char *name,
const void *buffer, size_t size, int flags)
diff --git a/fs/aio.c b/fs/aio.c
index 562916d85cba..e85ba0b77f59 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -361,6 +361,9 @@ static int aio_ring_mremap(struct vm_area_struct *vma)
spin_lock(&mm->ioctx_lock);
rcu_read_lock();
table = rcu_dereference(mm->ioctx_table);
+ if (!table)
+ goto out_unlock;
+
for (i = 0; i < table->nr; i++) {
struct kioctx *ctx;
@@ -374,6 +377,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma)
}
}
+out_unlock:
rcu_read_unlock();
spin_unlock(&mm->ioctx_lock);
return res;
diff --git a/fs/attr.c b/fs/attr.c
index b45f30e516fa..aca9ff7aed33 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -14,6 +14,7 @@
#include <linux/capability.h>
#include <linux/fsnotify.h>
#include <linux/fcntl.h>
+#include <linux/filelock.h>
#include <linux/security.h>
#include <linux/evm.h>
#include <linux/ima.h>
@@ -23,7 +24,7 @@
/**
* setattr_should_drop_sgid - determine whether the setgid bit needs to be
* removed
- * @mnt_userns: user namespace of the mount @inode was found from
+ * @idmap: idmap of the mount @inode was found from
* @inode: inode to check
*
* This function determines whether the setgid bit needs to be removed.
@@ -33,7 +34,7 @@
*
* Return: ATTR_KILL_SGID if setgid bit needs to be removed, 0 otherwise.
*/
-int setattr_should_drop_sgid(struct user_namespace *mnt_userns,
+int setattr_should_drop_sgid(struct mnt_idmap *idmap,
const struct inode *inode)
{
umode_t mode = inode->i_mode;
@@ -42,8 +43,7 @@ int setattr_should_drop_sgid(struct user_namespace *mnt_userns,
return 0;
if (mode & S_IXGRP)
return ATTR_KILL_SGID;
- if (!in_group_or_capable(mnt_userns, inode,
- i_gid_into_vfsgid(mnt_userns, inode)))
+ if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode)))
return ATTR_KILL_SGID;
return 0;
}
@@ -51,7 +51,7 @@ int setattr_should_drop_sgid(struct user_namespace *mnt_userns,
/**
* setattr_should_drop_suidgid - determine whether the set{g,u}id bit needs to
* be dropped
- * @mnt_userns: user namespace of the mount @inode was found from
+ * @idmap: idmap of the mount @inode was found from
* @inode: inode to check
*
* This function determines whether the set{g,u}id bits need to be removed.
@@ -63,7 +63,7 @@ int setattr_should_drop_sgid(struct user_namespace *mnt_userns,
* Return: A mask of ATTR_KILL_S{G,U}ID indicating which - if any - setid bits
* to remove, 0 otherwise.
*/
-int setattr_should_drop_suidgid(struct user_namespace *mnt_userns,
+int setattr_should_drop_suidgid(struct mnt_idmap *idmap,
struct inode *inode)
{
umode_t mode = inode->i_mode;
@@ -73,7 +73,7 @@ int setattr_should_drop_suidgid(struct user_namespace *mnt_userns,
if (unlikely(mode & S_ISUID))
kill = ATTR_KILL_SUID;
- kill |= setattr_should_drop_sgid(mnt_userns, inode);
+ kill |= setattr_should_drop_sgid(idmap, inode);
if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
return kill;
@@ -84,24 +84,24 @@ EXPORT_SYMBOL(setattr_should_drop_suidgid);
/**
* chown_ok - verify permissions to chown inode
- * @mnt_userns: user namespace of the mount @inode was found from
+ * @idmap: idmap of the mount @inode was found from
* @inode: inode to check permissions on
* @ia_vfsuid: uid to chown @inode to
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then
- * take care to map the inode according to @mnt_userns before checking
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then
+ * take care to map the inode according to @idmap before checking
* permissions. On non-idmapped mounts or if permission checking is to be
- * performed on the raw inode simply passs init_user_ns.
+ * performed on the raw inode simply pass @nop_mnt_idmap.
*/
-static bool chown_ok(struct user_namespace *mnt_userns,
+static bool chown_ok(struct mnt_idmap *idmap,
const struct inode *inode, vfsuid_t ia_vfsuid)
{
- vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
+ vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
if (vfsuid_eq_kuid(vfsuid, current_fsuid()) &&
vfsuid_eq(ia_vfsuid, vfsuid))
return true;
- if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
+ if (capable_wrt_inode_uidgid(idmap, inode, CAP_CHOWN))
return true;
if (!vfsuid_valid(vfsuid) &&
ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
@@ -111,28 +111,28 @@ static bool chown_ok(struct user_namespace *mnt_userns,
/**
* chgrp_ok - verify permissions to chgrp inode
- * @mnt_userns: user namespace of the mount @inode was found from
+ * @idmap: idmap of the mount @inode was found from
* @inode: inode to check permissions on
* @ia_vfsgid: gid to chown @inode to
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then
- * take care to map the inode according to @mnt_userns before checking
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then
+ * take care to map the inode according to @idmap before checking
* permissions. On non-idmapped mounts or if permission checking is to be
- * performed on the raw inode simply passs init_user_ns.
+ * performed on the raw inode simply pass @nop_mnt_idmap.
*/
-static bool chgrp_ok(struct user_namespace *mnt_userns,
+static bool chgrp_ok(struct mnt_idmap *idmap,
const struct inode *inode, vfsgid_t ia_vfsgid)
{
- vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
- vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
+ vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
+ vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
if (vfsuid_eq_kuid(vfsuid, current_fsuid())) {
if (vfsgid_eq(ia_vfsgid, vfsgid))
return true;
if (vfsgid_in_group_p(ia_vfsgid))
return true;
}
- if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
+ if (capable_wrt_inode_uidgid(idmap, inode, CAP_CHOWN))
return true;
if (!vfsgid_valid(vfsgid) &&
ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
@@ -142,7 +142,7 @@ static bool chgrp_ok(struct user_namespace *mnt_userns,
/**
* setattr_prepare - check if attribute changes to a dentry are allowed
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dentry: dentry to check
* @attr: attributes to change
*
@@ -152,16 +152,16 @@ static bool chgrp_ok(struct user_namespace *mnt_userns,
* SGID bit from mode if user is not allowed to set it. Also file capabilities
* and IMA extended attributes are cleared if ATTR_KILL_PRIV is set.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then
- * take care to map the inode according to @mnt_userns before checking
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then
+ * take care to map the inode according to @idmap before checking
* permissions. On non-idmapped mounts or if permission checking is to be
- * performed on the raw inode simply passs init_user_ns.
+ * performed on the raw inode simply passs @nop_mnt_idmap.
*
* Should be called as the first thing in ->setattr implementations,
* possibly after taking additional locks.
*/
-int setattr_prepare(struct user_namespace *mnt_userns, struct dentry *dentry,
+int setattr_prepare(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
@@ -183,34 +183,34 @@ int setattr_prepare(struct user_namespace *mnt_userns, struct dentry *dentry,
/* Make sure a caller can chown. */
if ((ia_valid & ATTR_UID) &&
- !chown_ok(mnt_userns, inode, attr->ia_vfsuid))
+ !chown_ok(idmap, inode, attr->ia_vfsuid))
return -EPERM;
/* Make sure caller can chgrp. */
if ((ia_valid & ATTR_GID) &&
- !chgrp_ok(mnt_userns, inode, attr->ia_vfsgid))
+ !chgrp_ok(idmap, inode, attr->ia_vfsgid))
return -EPERM;
/* Make sure a caller can chmod. */
if (ia_valid & ATTR_MODE) {
vfsgid_t vfsgid;
- if (!inode_owner_or_capable(mnt_userns, inode))
+ if (!inode_owner_or_capable(idmap, inode))
return -EPERM;
if (ia_valid & ATTR_GID)
vfsgid = attr->ia_vfsgid;
else
- vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
+ vfsgid = i_gid_into_vfsgid(idmap, inode);
/* Also check the setgid bit! */
- if (!in_group_or_capable(mnt_userns, inode, vfsgid))
+ if (!in_group_or_capable(idmap, inode, vfsgid))
attr->ia_mode &= ~S_ISGID;
}
/* Check for setting the inode time. */
if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) {
- if (!inode_owner_or_capable(mnt_userns, inode))
+ if (!inode_owner_or_capable(idmap, inode))
return -EPERM;
}
@@ -219,7 +219,7 @@ kill_priv:
if (ia_valid & ATTR_KILL_PRIV) {
int error;
- error = security_inode_killpriv(mnt_userns, dentry);
+ error = security_inode_killpriv(idmap, dentry);
if (error)
return error;
}
@@ -276,7 +276,7 @@ EXPORT_SYMBOL(inode_newsize_ok);
/**
* setattr_copy - copy simple metadata updates into the generic inode
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @inode: the inode to be updated
* @attr: the new attributes
*
@@ -289,23 +289,23 @@ EXPORT_SYMBOL(inode_newsize_ok);
* Noticeably missing is inode size update, which is more complex
* as it requires pagecache updates.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then
- * take care to map the inode according to @mnt_userns before checking
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then
+ * take care to map the inode according to @idmap before checking
* permissions. On non-idmapped mounts or if permission checking is to be
- * performed on the raw inode simply passs init_user_ns.
+ * performed on the raw inode simply pass @nop_mnt_idmap.
*
* The inode is not marked as dirty after this operation. The rationale is
* that for "simple" filesystems, the struct inode is the inode storage.
* The caller is free to mark the inode dirty afterwards if needed.
*/
-void setattr_copy(struct user_namespace *mnt_userns, struct inode *inode,
+void setattr_copy(struct mnt_idmap *idmap, struct inode *inode,
const struct iattr *attr)
{
unsigned int ia_valid = attr->ia_valid;
- i_uid_update(mnt_userns, attr, inode);
- i_gid_update(mnt_userns, attr, inode);
+ i_uid_update(idmap, attr, inode);
+ i_gid_update(idmap, attr, inode);
if (ia_valid & ATTR_ATIME)
inode->i_atime = attr->ia_atime;
if (ia_valid & ATTR_MTIME)
@@ -314,15 +314,15 @@ void setattr_copy(struct user_namespace *mnt_userns, struct inode *inode,
inode->i_ctime = attr->ia_ctime;
if (ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;
- if (!in_group_or_capable(mnt_userns, inode,
- i_gid_into_vfsgid(mnt_userns, inode)))
+ if (!in_group_or_capable(idmap, inode,
+ i_gid_into_vfsgid(idmap, inode)))
mode &= ~S_ISGID;
inode->i_mode = mode;
}
}
EXPORT_SYMBOL(setattr_copy);
-int may_setattr(struct user_namespace *mnt_userns, struct inode *inode,
+int may_setattr(struct mnt_idmap *idmap, struct inode *inode,
unsigned int ia_valid)
{
int error;
@@ -340,8 +340,8 @@ int may_setattr(struct user_namespace *mnt_userns, struct inode *inode,
if (IS_IMMUTABLE(inode))
return -EPERM;
- if (!inode_owner_or_capable(mnt_userns, inode)) {
- error = inode_permission(mnt_userns, inode, MAY_WRITE);
+ if (!inode_owner_or_capable(idmap, inode)) {
+ error = inode_permission(idmap, inode, MAY_WRITE);
if (error)
return error;
}
@@ -352,7 +352,7 @@ EXPORT_SYMBOL(may_setattr);
/**
* notify_change - modify attributes of a filesytem object
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dentry: object affected
* @attr: new attributes
* @delegated_inode: returns inode, if the inode is delegated
@@ -371,13 +371,13 @@ EXPORT_SYMBOL(may_setattr);
* the file open for write, as there can be no conflicting delegation in
* that case.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then
- * take care to map the inode according to @mnt_userns before checking
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then
+ * take care to map the inode according to @idmap before checking
* permissions. On non-idmapped mounts or if permission checking is to be
- * performed on the raw inode simply passs init_user_ns.
+ * performed on the raw inode simply pass @nop_mnt_idmap.
*/
-int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
+int notify_change(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr, struct inode **delegated_inode)
{
struct inode *inode = dentry->d_inode;
@@ -388,7 +388,7 @@ int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
WARN_ON_ONCE(!inode_is_locked(inode));
- error = may_setattr(mnt_userns, inode, ia_valid);
+ error = may_setattr(idmap, inode, ia_valid);
if (error)
return error;
@@ -453,11 +453,11 @@ int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
* namespace of the superblock.
*/
if (ia_valid & ATTR_UID &&
- !vfsuid_has_fsmapping(mnt_userns, inode->i_sb->s_user_ns,
+ !vfsuid_has_fsmapping(idmap, inode->i_sb->s_user_ns,
attr->ia_vfsuid))
return -EOVERFLOW;
if (ia_valid & ATTR_GID &&
- !vfsgid_has_fsmapping(mnt_userns, inode->i_sb->s_user_ns,
+ !vfsgid_has_fsmapping(idmap, inode->i_sb->s_user_ns,
attr->ia_vfsgid))
return -EOVERFLOW;
@@ -465,13 +465,13 @@ int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
* gids unless those uids & gids are being made valid.
*/
if (!(ia_valid & ATTR_UID) &&
- !vfsuid_valid(i_uid_into_vfsuid(mnt_userns, inode)))
+ !vfsuid_valid(i_uid_into_vfsuid(idmap, inode)))
return -EOVERFLOW;
if (!(ia_valid & ATTR_GID) &&
- !vfsgid_valid(i_gid_into_vfsgid(mnt_userns, inode)))
+ !vfsgid_valid(i_gid_into_vfsgid(idmap, inode)))
return -EOVERFLOW;
- error = security_inode_setattr(mnt_userns, dentry, attr);
+ error = security_inode_setattr(idmap, dentry, attr);
if (error)
return error;
error = try_break_deleg(inode, delegated_inode);
@@ -479,13 +479,13 @@ int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
return error;
if (inode->i_op->setattr)
- error = inode->i_op->setattr(mnt_userns, dentry, attr);
+ error = inode->i_op->setattr(idmap, dentry, attr);
else
- error = simple_setattr(mnt_userns, dentry, attr);
+ error = simple_setattr(idmap, dentry, attr);
if (!error) {
fsnotify_change(dentry, ia_valid);
- ima_inode_post_setattr(mnt_userns, dentry);
+ ima_inode_post_setattr(idmap, dentry);
evm_inode_post_setattr(dentry, ia_valid);
}
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index ca03c1cae2be..6baf90b08e0e 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -10,12 +10,12 @@
#include "autofs_i.h"
-static int autofs_dir_permission(struct user_namespace *, struct inode *, int);
-static int autofs_dir_symlink(struct user_namespace *, struct inode *,
+static int autofs_dir_permission(struct mnt_idmap *, struct inode *, int);
+static int autofs_dir_symlink(struct mnt_idmap *, struct inode *,
struct dentry *, const char *);
static int autofs_dir_unlink(struct inode *, struct dentry *);
static int autofs_dir_rmdir(struct inode *, struct dentry *);
-static int autofs_dir_mkdir(struct user_namespace *, struct inode *,
+static int autofs_dir_mkdir(struct mnt_idmap *, struct inode *,
struct dentry *, umode_t);
static long autofs_root_ioctl(struct file *, unsigned int, unsigned long);
#ifdef CONFIG_COMPAT
@@ -543,7 +543,7 @@ static struct dentry *autofs_lookup(struct inode *dir,
return NULL;
}
-static int autofs_dir_permission(struct user_namespace *mnt_userns,
+static int autofs_dir_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
if (mask & MAY_WRITE) {
@@ -560,10 +560,10 @@ static int autofs_dir_permission(struct user_namespace *mnt_userns,
return -EACCES;
}
- return generic_permission(mnt_userns, inode, mask);
+ return generic_permission(idmap, inode, mask);
}
-static int autofs_dir_symlink(struct user_namespace *mnt_userns,
+static int autofs_dir_symlink(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry,
const char *symname)
{
@@ -720,7 +720,7 @@ static int autofs_dir_rmdir(struct inode *dir, struct dentry *dentry)
return 0;
}
-static int autofs_dir_mkdir(struct user_namespace *mnt_userns,
+static int autofs_dir_mkdir(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry,
umode_t mode)
{
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 92737166203f..db649487d58c 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -27,7 +27,7 @@ static const struct file_operations bad_file_ops =
.open = bad_file_open,
};
-static int bad_inode_create(struct user_namespace *mnt_userns,
+static int bad_inode_create(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry,
umode_t mode, bool excl)
{
@@ -51,14 +51,14 @@ static int bad_inode_unlink(struct inode *dir, struct dentry *dentry)
return -EIO;
}
-static int bad_inode_symlink(struct user_namespace *mnt_userns,
+static int bad_inode_symlink(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry,
const char *symname)
{
return -EIO;
}
-static int bad_inode_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int bad_inode_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
return -EIO;
@@ -69,13 +69,13 @@ static int bad_inode_rmdir (struct inode *dir, struct dentry *dentry)
return -EIO;
}
-static int bad_inode_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int bad_inode_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
return -EIO;
}
-static int bad_inode_rename2(struct user_namespace *mnt_userns,
+static int bad_inode_rename2(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
@@ -89,20 +89,20 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
return -EIO;
}
-static int bad_inode_permission(struct user_namespace *mnt_userns,
+static int bad_inode_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
return -EIO;
}
-static int bad_inode_getattr(struct user_namespace *mnt_userns,
+static int bad_inode_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
return -EIO;
}
-static int bad_inode_setattr(struct user_namespace *mnt_userns,
+static int bad_inode_setattr(struct mnt_idmap *idmap,
struct dentry *direntry, struct iattr *attrs)
{
return -EIO;
@@ -146,14 +146,14 @@ static int bad_inode_atomic_open(struct inode *inode, struct dentry *dentry,
return -EIO;
}
-static int bad_inode_tmpfile(struct user_namespace *mnt_userns,
+static int bad_inode_tmpfile(struct mnt_idmap *idmap,
struct inode *inode, struct file *file,
umode_t mode)
{
return -EIO;
}
-static int bad_inode_set_acl(struct user_namespace *mnt_userns,
+static int bad_inode_set_acl(struct mnt_idmap *idmap,
struct dentry *dentry, struct posix_acl *acl,
int type)
{
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 34d4f68f786b..040d5140e426 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -75,7 +75,7 @@ const struct file_operations bfs_dir_operations = {
.llseek = generic_file_llseek,
};
-static int bfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int bfs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
int err;
@@ -96,7 +96,7 @@ static int bfs_create(struct user_namespace *mnt_userns, struct inode *dir,
}
set_bit(ino, info->si_imap);
info->si_freei--;
- inode_init_owner(&init_user_ns, inode, dir, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
inode->i_blocks = 0;
inode->i_op = &bfs_file_inops;
@@ -199,7 +199,7 @@ out_brelse:
return error;
}
-static int bfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+static int bfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index de63572a9404..9a780fafc539 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2034,7 +2034,7 @@ static int elf_core_dump(struct coredump_params *cprm)
* The number of segs are recored into ELF header as 16bit value.
* Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
*/
- segs = cprm->vma_count + elf_core_extra_phdrs();
+ segs = cprm->vma_count + elf_core_extra_phdrs(cprm);
/* for notes section */
segs++;
@@ -2074,7 +2074,7 @@ static int elf_core_dump(struct coredump_params *cprm)
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
offset += cprm->vma_data_size;
- offset += elf_core_extra_data_size();
+ offset += elf_core_extra_data_size(cprm);
e_shoff = offset;
if (e_phnum == PN_XNUM) {
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 096e3520a0b1..a05eafcacfb2 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1509,7 +1509,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
tmp->next = thread_list;
thread_list = tmp;
- segs = cprm->vma_count + elf_core_extra_phdrs();
+ segs = cprm->vma_count + elf_core_extra_phdrs(cprm);
/* for notes section */
segs++;
@@ -1555,7 +1555,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
offset += cprm->vma_data_size;
- offset += elf_core_extra_data_size();
+ offset += elf_core_extra_data_size(cprm);
e_shoff = offset;
if (e_phnum == PN_XNUM) {
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 555c962fdad6..90d53209755b 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -11,7 +11,8 @@ condflags := \
$(call cc-option, -Wunused-but-set-variable) \
$(call cc-option, -Wunused-const-variable) \
$(call cc-option, -Wpacked-not-aligned) \
- $(call cc-option, -Wstringop-truncation)
+ $(call cc-option, -Wstringop-truncation) \
+ $(call cc-option, -Wmaybe-uninitialized)
subdir-ccflags-y += $(condflags)
# The following turn off the warnings enabled by -Wextra
subdir-ccflags-y += -Wno-missing-field-initializers
@@ -31,7 +32,8 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
- subpage.o tree-mod-log.o extent-io-tree.o fs.o messages.o bio.o
+ subpage.o tree-mod-log.o extent-io-tree.o fs.o messages.o bio.o \
+ lru_cache.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 3da1779e8b79..7427449a04a3 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -110,7 +110,7 @@ out:
return ret;
}
-int btrfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int btrfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
int ret;
@@ -118,7 +118,7 @@ int btrfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
umode_t old_mode = inode->i_mode;
if (type == ACL_TYPE_ACCESS && acl) {
- ret = posix_acl_update_mode(mnt_userns, inode,
+ ret = posix_acl_update_mode(idmap, inode,
&inode->i_mode, &acl);
if (ret)
return ret;
diff --git a/fs/btrfs/acl.h b/fs/btrfs/acl.h
index 39bd36e6eeb7..a270e71ec05f 100644
--- a/fs/btrfs/acl.h
+++ b/fs/btrfs/acl.h
@@ -6,7 +6,7 @@
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu);
-int btrfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int btrfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type);
int __btrfs_set_acl(struct btrfs_trans_handle *trans, struct inode *inode,
struct posix_acl *acl, int type);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 21c92c74bf71..90e40d5ceccd 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -484,6 +484,7 @@ static int add_all_parents(struct btrfs_backref_walk_ctx *ctx,
u64 wanted_disk_byte = ref->wanted_disk_byte;
u64 count = 0;
u64 data_offset;
+ u8 type;
if (level != 0) {
eb = path->nodes[level];
@@ -538,6 +539,9 @@ static int add_all_parents(struct btrfs_backref_walk_ctx *ctx,
continue;
}
fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
+ type = btrfs_file_extent_type(eb, fi);
+ if (type == BTRFS_FILE_EXTENT_INLINE)
+ goto next;
disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
data_offset = btrfs_file_extent_offset(eb, fi);
@@ -1248,8 +1252,12 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_share_check_ctx *ct
struct btrfs_root *root,
u64 bytenr, int level, bool *is_shared)
{
+ const struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_backref_shared_cache_entry *entry;
+ if (!current->journal_info)
+ lockdep_assert_held(&fs_info->commit_root_sem);
+
if (!ctx->use_path_cache)
return false;
@@ -1284,7 +1292,7 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_share_check_ctx *ct
* could be a snapshot sharing this extent buffer.
*/
if (entry->is_shared &&
- entry->gen != btrfs_get_last_root_drop_gen(root->fs_info))
+ entry->gen != btrfs_get_last_root_drop_gen(fs_info))
return false;
*is_shared = entry->is_shared;
@@ -1314,9 +1322,13 @@ static void store_backref_shared_cache(struct btrfs_backref_share_check_ctx *ctx
struct btrfs_root *root,
u64 bytenr, int level, bool is_shared)
{
+ const struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_backref_shared_cache_entry *entry;
u64 gen;
+ if (!current->journal_info)
+ lockdep_assert_held(&fs_info->commit_root_sem);
+
if (!ctx->use_path_cache)
return;
@@ -1332,7 +1344,7 @@ static void store_backref_shared_cache(struct btrfs_backref_share_check_ctx *ctx
ASSERT(level >= 0);
if (is_shared)
- gen = btrfs_get_last_root_drop_gen(root->fs_info);
+ gen = btrfs_get_last_root_drop_gen(fs_info);
else
gen = btrfs_root_last_snapshot(&root->root_item);
@@ -1860,6 +1872,8 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
.have_delayed_delete_refs = false,
};
int level;
+ bool leaf_cached;
+ bool leaf_is_shared;
for (int i = 0; i < BTRFS_BACKREF_CTX_PREV_EXTENTS_SIZE; i++) {
if (ctx->prev_extents_cache[i].bytenr == bytenr)
@@ -1881,6 +1895,23 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
walk_ctx.time_seq = elem.seq;
}
+ ctx->use_path_cache = true;
+
+ /*
+ * We may have previously determined that the current leaf is shared.
+ * If it is, then we have a data extent that is shared due to a shared
+ * subtree (caused by snapshotting) and we don't need to check for data
+ * backrefs. If the leaf is not shared, then we must do backref walking
+ * to determine if the data extent is shared through reflinks.
+ */
+ leaf_cached = lookup_backref_shared_cache(ctx, root,
+ ctx->curr_leaf_bytenr, 0,
+ &leaf_is_shared);
+ if (leaf_cached && leaf_is_shared) {
+ ret = 1;
+ goto out_trans;
+ }
+
walk_ctx.ignore_extent_item_pos = true;
walk_ctx.trans = trans;
walk_ctx.fs_info = fs_info;
@@ -1889,7 +1920,6 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
/* -1 means we are in the bytenr of the data extent. */
level = -1;
ULIST_ITER_INIT(&uiter);
- ctx->use_path_cache = true;
while (1) {
bool is_shared;
bool cached;
@@ -1960,6 +1990,7 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
ctx->prev_extents_cache_slot = slot;
}
+out_trans:
if (trans) {
btrfs_put_tree_mod_seq(fs_info, &elem);
btrfs_end_transaction(trans);
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index b8fb7ef6b520..d8b90f95b157 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -14,19 +14,31 @@
#include "dev-replace.h"
#include "rcu-string.h"
#include "zoned.h"
+#include "file-item.h"
static struct bio_set btrfs_bioset;
+static struct bio_set btrfs_clone_bioset;
+static struct bio_set btrfs_repair_bioset;
+static mempool_t btrfs_failed_bio_pool;
+
+struct btrfs_failed_bio {
+ struct btrfs_bio *bbio;
+ int num_copies;
+ atomic_t repair_count;
+};
/*
* Initialize a btrfs_bio structure. This skips the embedded bio itself as it
* is already initialized by the block layer.
*/
-static inline void btrfs_bio_init(struct btrfs_bio *bbio,
- btrfs_bio_end_io_t end_io, void *private)
+void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_inode *inode,
+ btrfs_bio_end_io_t end_io, void *private)
{
memset(bbio, 0, offsetof(struct btrfs_bio, bio));
+ bbio->inode = inode;
bbio->end_io = end_io;
bbio->private = private;
+ atomic_set(&bbio->pending_ios, 1);
}
/*
@@ -37,32 +49,235 @@ static inline void btrfs_bio_init(struct btrfs_bio *bbio,
* a mempool.
*/
struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
+ struct btrfs_inode *inode,
btrfs_bio_end_io_t end_io, void *private)
{
struct bio *bio;
bio = bio_alloc_bioset(NULL, nr_vecs, opf, GFP_NOFS, &btrfs_bioset);
- btrfs_bio_init(btrfs_bio(bio), end_io, private);
+ btrfs_bio_init(btrfs_bio(bio), inode, end_io, private);
return bio;
}
-struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
- btrfs_bio_end_io_t end_io, void *private)
+static struct bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
+ struct bio *orig, u64 map_length,
+ bool use_append)
{
+ struct btrfs_bio *orig_bbio = btrfs_bio(orig);
struct bio *bio;
- struct btrfs_bio *bbio;
- ASSERT(offset <= UINT_MAX && size <= UINT_MAX);
+ if (use_append) {
+ unsigned int nr_segs;
+
+ bio = bio_split_rw(orig, &fs_info->limits, &nr_segs,
+ &btrfs_clone_bioset, map_length);
+ } else {
+ bio = bio_split(orig, map_length >> SECTOR_SHIFT, GFP_NOFS,
+ &btrfs_clone_bioset);
+ }
+ btrfs_bio_init(btrfs_bio(bio), orig_bbio->inode, NULL, orig_bbio);
- bio = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOFS, &btrfs_bioset);
- bbio = btrfs_bio(bio);
- btrfs_bio_init(bbio, end_io, private);
+ btrfs_bio(bio)->file_offset = orig_bbio->file_offset;
+ if (!(orig->bi_opf & REQ_BTRFS_ONE_ORDERED))
+ orig_bbio->file_offset += map_length;
- bio_trim(bio, offset >> 9, size >> 9);
- bbio->iter = bio->bi_iter;
+ atomic_inc(&orig_bbio->pending_ios);
return bio;
}
+static void btrfs_orig_write_end_io(struct bio *bio);
+
+static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio,
+ struct btrfs_bio *orig_bbio)
+{
+ /*
+ * For writes we tolerate nr_mirrors - 1 write failures, so we can't
+ * just blindly propagate a write failure here. Instead increment the
+ * error count in the original I/O context so that it is guaranteed to
+ * be larger than the error tolerance.
+ */
+ if (bbio->bio.bi_end_io == &btrfs_orig_write_end_io) {
+ struct btrfs_io_stripe *orig_stripe = orig_bbio->bio.bi_private;
+ struct btrfs_io_context *orig_bioc = orig_stripe->bioc;
+
+ atomic_add(orig_bioc->max_errors, &orig_bioc->error);
+ } else {
+ orig_bbio->bio.bi_status = bbio->bio.bi_status;
+ }
+}
+
+static void btrfs_orig_bbio_end_io(struct btrfs_bio *bbio)
+{
+ if (bbio->bio.bi_pool == &btrfs_clone_bioset) {
+ struct btrfs_bio *orig_bbio = bbio->private;
+
+ if (bbio->bio.bi_status)
+ btrfs_bbio_propagate_error(bbio, orig_bbio);
+ bio_put(&bbio->bio);
+ bbio = orig_bbio;
+ }
+
+ if (atomic_dec_and_test(&bbio->pending_ios))
+ bbio->end_io(bbio);
+}
+
+static int next_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
+{
+ if (cur_mirror == fbio->num_copies)
+ return cur_mirror + 1 - fbio->num_copies;
+ return cur_mirror + 1;
+}
+
+static int prev_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
+{
+ if (cur_mirror == 1)
+ return fbio->num_copies;
+ return cur_mirror - 1;
+}
+
+static void btrfs_repair_done(struct btrfs_failed_bio *fbio)
+{
+ if (atomic_dec_and_test(&fbio->repair_count)) {
+ btrfs_orig_bbio_end_io(fbio->bbio);
+ mempool_free(fbio, &btrfs_failed_bio_pool);
+ }
+}
+
+static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
+ struct btrfs_device *dev)
+{
+ struct btrfs_failed_bio *fbio = repair_bbio->private;
+ struct btrfs_inode *inode = repair_bbio->inode;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct bio_vec *bv = bio_first_bvec_all(&repair_bbio->bio);
+ int mirror = repair_bbio->mirror_num;
+
+ if (repair_bbio->bio.bi_status ||
+ !btrfs_data_csum_ok(repair_bbio, dev, 0, bv)) {
+ bio_reset(&repair_bbio->bio, NULL, REQ_OP_READ);
+ repair_bbio->bio.bi_iter = repair_bbio->saved_iter;
+
+ mirror = next_repair_mirror(fbio, mirror);
+ if (mirror == fbio->bbio->mirror_num) {
+ btrfs_debug(fs_info, "no mirror left");
+ fbio->bbio->bio.bi_status = BLK_STS_IOERR;
+ goto done;
+ }
+
+ btrfs_submit_bio(&repair_bbio->bio, mirror);
+ return;
+ }
+
+ do {
+ mirror = prev_repair_mirror(fbio, mirror);
+ btrfs_repair_io_failure(fs_info, btrfs_ino(inode),
+ repair_bbio->file_offset, fs_info->sectorsize,
+ repair_bbio->saved_iter.bi_sector << SECTOR_SHIFT,
+ bv->bv_page, bv->bv_offset, mirror);
+ } while (mirror != fbio->bbio->mirror_num);
+
+done:
+ btrfs_repair_done(fbio);
+ bio_put(&repair_bbio->bio);
+}
+
+/*
+ * Try to kick off a repair read to the next available mirror for a bad sector.
+ *
+ * This primarily tries to recover good data to serve the actual read request,
+ * but also tries to write the good data back to the bad mirror(s) when a
+ * read succeeded to restore the redundancy.
+ */
+static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
+ u32 bio_offset,
+ struct bio_vec *bv,
+ struct btrfs_failed_bio *fbio)
+{
+ struct btrfs_inode *inode = failed_bbio->inode;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ const u32 sectorsize = fs_info->sectorsize;
+ const u64 logical = (failed_bbio->saved_iter.bi_sector << SECTOR_SHIFT);
+ struct btrfs_bio *repair_bbio;
+ struct bio *repair_bio;
+ int num_copies;
+ int mirror;
+
+ btrfs_debug(fs_info, "repair read error: read error at %llu",
+ failed_bbio->file_offset + bio_offset);
+
+ num_copies = btrfs_num_copies(fs_info, logical, sectorsize);
+ if (num_copies == 1) {
+ btrfs_debug(fs_info, "no copy to repair from");
+ failed_bbio->bio.bi_status = BLK_STS_IOERR;
+ return fbio;
+ }
+
+ if (!fbio) {
+ fbio = mempool_alloc(&btrfs_failed_bio_pool, GFP_NOFS);
+ fbio->bbio = failed_bbio;
+ fbio->num_copies = num_copies;
+ atomic_set(&fbio->repair_count, 1);
+ }
+
+ atomic_inc(&fbio->repair_count);
+
+ repair_bio = bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_NOFS,
+ &btrfs_repair_bioset);
+ repair_bio->bi_iter.bi_sector = failed_bbio->saved_iter.bi_sector;
+ bio_add_page(repair_bio, bv->bv_page, bv->bv_len, bv->bv_offset);
+
+ repair_bbio = btrfs_bio(repair_bio);
+ btrfs_bio_init(repair_bbio, failed_bbio->inode, NULL, fbio);
+ repair_bbio->file_offset = failed_bbio->file_offset + bio_offset;
+
+ mirror = next_repair_mirror(fbio, failed_bbio->mirror_num);
+ btrfs_debug(fs_info, "submitting repair read to mirror %d", mirror);
+ btrfs_submit_bio(repair_bio, mirror);
+ return fbio;
+}
+
+static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *dev)
+{
+ struct btrfs_inode *inode = bbio->inode;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ u32 sectorsize = fs_info->sectorsize;
+ struct bvec_iter *iter = &bbio->saved_iter;
+ blk_status_t status = bbio->bio.bi_status;
+ struct btrfs_failed_bio *fbio = NULL;
+ u32 offset = 0;
+
+ /*
+ * Hand off repair bios to the repair code as there is no upper level
+ * submitter for them.
+ */
+ if (bbio->bio.bi_pool == &btrfs_repair_bioset) {
+ btrfs_end_repair_bio(bbio, dev);
+ return;
+ }
+
+ /* Clear the I/O error. A failed repair will reset it. */
+ bbio->bio.bi_status = BLK_STS_OK;
+
+ while (iter->bi_size) {
+ struct bio_vec bv = bio_iter_iovec(&bbio->bio, *iter);
+
+ bv.bv_len = min(bv.bv_len, sectorsize);
+ if (status || !btrfs_data_csum_ok(bbio, dev, offset, &bv))
+ fbio = repair_one_sector(bbio, offset, &bv, fbio);
+
+ bio_advance_iter_single(&bbio->bio, iter, sectorsize);
+ offset += sectorsize;
+ }
+
+ if (bbio->csum != bbio->csum_inline)
+ kfree(bbio->csum);
+
+ if (fbio)
+ btrfs_repair_done(fbio);
+ else
+ btrfs_orig_bbio_end_io(bbio);
+}
+
static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev)
{
if (!dev || !dev->bdev)
@@ -90,24 +305,31 @@ static void btrfs_end_bio_work(struct work_struct *work)
{
struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
- bbio->end_io(bbio);
+ /* Metadata reads are checked and repaired by the submitter. */
+ if (bbio->bio.bi_opf & REQ_META)
+ bbio->end_io(bbio);
+ else
+ btrfs_check_read_bio(bbio, bbio->bio.bi_private);
}
static void btrfs_simple_end_io(struct bio *bio)
{
- struct btrfs_fs_info *fs_info = bio->bi_private;
struct btrfs_bio *bbio = btrfs_bio(bio);
+ struct btrfs_device *dev = bio->bi_private;
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
btrfs_bio_counter_dec(fs_info);
if (bio->bi_status)
- btrfs_log_dev_io_error(bio, bbio->device);
+ btrfs_log_dev_io_error(bio, dev);
if (bio_op(bio) == REQ_OP_READ) {
INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
} else {
- bbio->end_io(bbio);
+ if (bio_op(bio) == REQ_OP_ZONE_APPEND)
+ btrfs_record_physical_zoned(bbio);
+ btrfs_orig_bbio_end_io(bbio);
}
}
@@ -118,7 +340,10 @@ static void btrfs_raid56_end_io(struct bio *bio)
btrfs_bio_counter_dec(bioc->fs_info);
bbio->mirror_num = bioc->mirror_num;
- bbio->end_io(bbio);
+ if (bio_op(bio) == REQ_OP_READ && !(bbio->bio.bi_opf & REQ_META))
+ btrfs_check_read_bio(bbio, NULL);
+ else
+ btrfs_orig_bbio_end_io(bbio);
btrfs_put_bioc(bioc);
}
@@ -145,7 +370,7 @@ static void btrfs_orig_write_end_io(struct bio *bio)
else
bio->bi_status = BLK_STS_OK;
- bbio->end_io(bbio);
+ btrfs_orig_bbio_end_io(bbio);
btrfs_put_bioc(bioc);
}
@@ -181,16 +406,10 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
*/
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
+ u64 zone_start = round_down(physical, dev->fs_info->zone_size);
- if (btrfs_dev_is_sequential(dev, physical)) {
- u64 zone_start = round_down(physical,
- dev->fs_info->zone_size);
-
- bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
- } else {
- bio->bi_opf &= ~REQ_OP_ZONE_APPEND;
- bio->bi_opf |= REQ_OP_WRITE;
- }
+ ASSERT(btrfs_dev_is_sequential(dev, physical));
+ bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
}
btrfs_debug_in_rcu(dev->fs_info,
"%s: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
@@ -224,41 +443,21 @@ static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
btrfs_submit_dev_bio(bioc->stripes[dev_nr].dev, bio);
}
-void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num)
+static void __btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc,
+ struct btrfs_io_stripe *smap, int mirror_num)
{
- u64 logical = bio->bi_iter.bi_sector << 9;
- u64 length = bio->bi_iter.bi_size;
- u64 map_length = length;
- struct btrfs_io_context *bioc = NULL;
- struct btrfs_io_stripe smap;
- int ret;
-
- btrfs_bio_counter_inc_blocked(fs_info);
- ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
- &bioc, &smap, &mirror_num, 1);
- if (ret) {
- btrfs_bio_counter_dec(fs_info);
- btrfs_bio_end_io(btrfs_bio(bio), errno_to_blk_status(ret));
- return;
- }
-
- if (map_length < length) {
- btrfs_crit(fs_info,
- "mapping failed logical %llu bio len %llu len %llu",
- logical, length, map_length);
- BUG();
- }
+ /* Do not leak our private flag into the block layer. */
+ bio->bi_opf &= ~REQ_BTRFS_ONE_ORDERED;
if (!bioc) {
- /* Single mirror read/write fast path */
+ /* Single mirror read/write fast path. */
btrfs_bio(bio)->mirror_num = mirror_num;
- btrfs_bio(bio)->device = smap.dev;
- bio->bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
- bio->bi_private = fs_info;
+ bio->bi_iter.bi_sector = smap->physical >> SECTOR_SHIFT;
+ bio->bi_private = smap->dev;
bio->bi_end_io = btrfs_simple_end_io;
- btrfs_submit_dev_bio(smap.dev, bio);
+ btrfs_submit_dev_bio(smap->dev, bio);
} else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
- /* Parity RAID write or read recovery */
+ /* Parity RAID write or read recovery. */
bio->bi_private = bioc;
bio->bi_end_io = btrfs_raid56_end_io;
if (bio_op(bio) == REQ_OP_READ)
@@ -266,16 +465,233 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror
else
raid56_parity_write(bio, bioc);
} else {
- /* Write to multiple mirrors */
+ /* Write to multiple mirrors. */
int total_devs = bioc->num_stripes;
- int dev_nr;
bioc->orig_bio = bio;
- for (dev_nr = 0; dev_nr < total_devs; dev_nr++)
+ for (int dev_nr = 0; dev_nr < total_devs; dev_nr++)
btrfs_submit_mirrored_bio(bioc, dev_nr);
}
}
+static blk_status_t btrfs_bio_csum(struct btrfs_bio *bbio)
+{
+ if (bbio->bio.bi_opf & REQ_META)
+ return btree_csum_one_bio(bbio);
+ return btrfs_csum_one_bio(bbio);
+}
+
+/*
+ * Async submit bios are used to offload expensive checksumming onto the worker
+ * threads.
+ */
+struct async_submit_bio {
+ struct btrfs_bio *bbio;
+ struct btrfs_io_context *bioc;
+ struct btrfs_io_stripe smap;
+ int mirror_num;
+ struct btrfs_work work;
+};
+
+/*
+ * In order to insert checksums into the metadata in large chunks, we wait
+ * until bio submission time. All the pages in the bio are checksummed and
+ * sums are attached onto the ordered extent record.
+ *
+ * At IO completion time the csums attached on the ordered extent record are
+ * inserted into the btree.
+ */
+static void run_one_async_start(struct btrfs_work *work)
+{
+ struct async_submit_bio *async =
+ container_of(work, struct async_submit_bio, work);
+ blk_status_t ret;
+
+ ret = btrfs_bio_csum(async->bbio);
+ if (ret)
+ async->bbio->bio.bi_status = ret;
+}
+
+/*
+ * In order to insert checksums into the metadata in large chunks, we wait
+ * until bio submission time. All the pages in the bio are checksummed and
+ * sums are attached onto the ordered extent record.
+ *
+ * At IO completion time the csums attached on the ordered extent record are
+ * inserted into the tree.
+ */
+static void run_one_async_done(struct btrfs_work *work)
+{
+ struct async_submit_bio *async =
+ container_of(work, struct async_submit_bio, work);
+ struct bio *bio = &async->bbio->bio;
+
+ /* If an error occurred we just want to clean up the bio and move on. */
+ if (bio->bi_status) {
+ btrfs_orig_bbio_end_io(async->bbio);
+ return;
+ }
+
+ /*
+ * All of the bios that pass through here are from async helpers.
+ * Use REQ_CGROUP_PUNT to issue them from the owning cgroup's context.
+ * This changes nothing when cgroups aren't in use.
+ */
+ bio->bi_opf |= REQ_CGROUP_PUNT;
+ __btrfs_submit_bio(bio, async->bioc, &async->smap, async->mirror_num);
+}
+
+static void run_one_async_free(struct btrfs_work *work)
+{
+ kfree(container_of(work, struct async_submit_bio, work));
+}
+
+static bool should_async_write(struct btrfs_bio *bbio)
+{
+ /*
+ * If the I/O is not issued by fsync and friends, (->sync_writers != 0),
+ * then try to defer the submission to a workqueue to parallelize the
+ * checksum calculation.
+ */
+ if (atomic_read(&bbio->inode->sync_writers))
+ return false;
+
+ /*
+ * Submit metadata writes synchronously if the checksum implementation
+ * is fast, or we are on a zoned device that wants I/O to be submitted
+ * in order.
+ */
+ if (bbio->bio.bi_opf & REQ_META) {
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
+
+ if (btrfs_is_zoned(fs_info))
+ return false;
+ if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags))
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Submit bio to an async queue.
+ *
+ * Return true if the work has been succesfuly submitted, else false.
+ */
+static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,
+ struct btrfs_io_context *bioc,
+ struct btrfs_io_stripe *smap, int mirror_num)
+{
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
+ struct async_submit_bio *async;
+
+ async = kmalloc(sizeof(*async), GFP_NOFS);
+ if (!async)
+ return false;
+
+ async->bbio = bbio;
+ async->bioc = bioc;
+ async->smap = *smap;
+ async->mirror_num = mirror_num;
+
+ btrfs_init_work(&async->work, run_one_async_start, run_one_async_done,
+ run_one_async_free);
+ if (op_is_sync(bbio->bio.bi_opf))
+ btrfs_queue_work(fs_info->hipri_workers, &async->work);
+ else
+ btrfs_queue_work(fs_info->workers, &async->work);
+ return true;
+}
+
+static bool btrfs_submit_chunk(struct bio *bio, int mirror_num)
+{
+ struct btrfs_bio *bbio = btrfs_bio(bio);
+ struct btrfs_inode *inode = bbio->inode;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct btrfs_bio *orig_bbio = bbio;
+ u64 logical = bio->bi_iter.bi_sector << 9;
+ u64 length = bio->bi_iter.bi_size;
+ u64 map_length = length;
+ bool use_append = btrfs_use_zone_append(bbio);
+ struct btrfs_io_context *bioc = NULL;
+ struct btrfs_io_stripe smap;
+ blk_status_t ret;
+ int error;
+
+ btrfs_bio_counter_inc_blocked(fs_info);
+ error = __btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
+ &bioc, &smap, &mirror_num, 1);
+ if (error) {
+ ret = errno_to_blk_status(error);
+ goto fail;
+ }
+
+ map_length = min(map_length, length);
+ if (use_append)
+ map_length = min(map_length, fs_info->max_zone_append_size);
+
+ if (map_length < length) {
+ bio = btrfs_split_bio(fs_info, bio, map_length, use_append);
+ bbio = btrfs_bio(bio);
+ }
+
+ /*
+ * Save the iter for the end_io handler and preload the checksums for
+ * data reads.
+ */
+ if (bio_op(bio) == REQ_OP_READ && !(bio->bi_opf & REQ_META)) {
+ bbio->saved_iter = bio->bi_iter;
+ ret = btrfs_lookup_bio_sums(bbio);
+ if (ret)
+ goto fail_put_bio;
+ }
+
+ if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
+ if (use_append) {
+ bio->bi_opf &= ~REQ_OP_WRITE;
+ bio->bi_opf |= REQ_OP_ZONE_APPEND;
+ ret = btrfs_extract_ordered_extent(btrfs_bio(bio));
+ if (ret)
+ goto fail_put_bio;
+ }
+
+ /*
+ * Csum items for reloc roots have already been cloned at this
+ * point, so they are handled as part of the no-checksum case.
+ */
+ if (!(inode->flags & BTRFS_INODE_NODATASUM) &&
+ !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state) &&
+ !btrfs_is_data_reloc_root(inode->root)) {
+ if (should_async_write(bbio) &&
+ btrfs_wq_submit_bio(bbio, bioc, &smap, mirror_num))
+ goto done;
+
+ ret = btrfs_bio_csum(bbio);
+ if (ret)
+ goto fail_put_bio;
+ }
+ }
+
+ __btrfs_submit_bio(bio, bioc, &smap, mirror_num);
+done:
+ return map_length == length;
+
+fail_put_bio:
+ if (map_length < length)
+ bio_put(bio);
+fail:
+ btrfs_bio_counter_dec(fs_info);
+ btrfs_bio_end_io(orig_bbio, ret);
+ /* Do not submit another chunk */
+ return true;
+}
+
+void btrfs_submit_bio(struct bio *bio, int mirror_num)
+{
+ while (!btrfs_submit_chunk(bio, mirror_num))
+ ;
+}
+
/*
* Submit a repair write.
*
@@ -283,7 +699,7 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror
* RAID setup. Here we only want to write the one bad copy, so we do the
* mapping ourselves and submit the bio directly.
*
- * The I/O is issued sychronously to block the repair read completion from
+ * The I/O is issued synchronously to block the repair read completion from
* freeing the bio.
*/
int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
@@ -329,7 +745,16 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
&map_length, &bioc, mirror_num);
if (ret)
goto out_counter_dec;
- BUG_ON(mirror_num != bioc->mirror_num);
+ /*
+ * This happens when dev-replace is also running, and the
+ * mirror_num indicates the dev-replace target.
+ *
+ * In this case, we don't need to do anything, as the read
+ * error just means the replace progress hasn't reached our
+ * read range, and later replace routine would handle it well.
+ */
+ if (mirror_num != bioc->mirror_num)
+ goto out_counter_dec;
}
sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9;
@@ -372,10 +797,31 @@ int __init btrfs_bioset_init(void)
offsetof(struct btrfs_bio, bio),
BIOSET_NEED_BVECS))
return -ENOMEM;
+ if (bioset_init(&btrfs_clone_bioset, BIO_POOL_SIZE,
+ offsetof(struct btrfs_bio, bio), 0))
+ goto out_free_bioset;
+ if (bioset_init(&btrfs_repair_bioset, BIO_POOL_SIZE,
+ offsetof(struct btrfs_bio, bio),
+ BIOSET_NEED_BVECS))
+ goto out_free_clone_bioset;
+ if (mempool_init_kmalloc_pool(&btrfs_failed_bio_pool, BIO_POOL_SIZE,
+ sizeof(struct btrfs_failed_bio)))
+ goto out_free_repair_bioset;
return 0;
+
+out_free_repair_bioset:
+ bioset_exit(&btrfs_repair_bioset);
+out_free_clone_bioset:
+ bioset_exit(&btrfs_clone_bioset);
+out_free_bioset:
+ bioset_exit(&btrfs_bioset);
+ return -ENOMEM;
}
void __cold btrfs_bioset_exit(void)
{
+ mempool_exit(&btrfs_failed_bio_pool);
+ bioset_exit(&btrfs_repair_bioset);
+ bioset_exit(&btrfs_clone_bioset);
bioset_exit(&btrfs_bioset);
}
diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h
index b12f84b3b341..873ff85817f0 100644
--- a/fs/btrfs/bio.h
+++ b/fs/btrfs/bio.h
@@ -26,32 +26,23 @@ struct btrfs_fs_info;
typedef void (*btrfs_bio_end_io_t)(struct btrfs_bio *bbio);
/*
- * Additional info to pass along bio.
- *
- * Mostly for btrfs specific features like csum and mirror_num.
+ * Highlevel btrfs I/O structure. It is allocated by btrfs_bio_alloc and
+ * passed to btrfs_submit_bio for mapping to the physical devices.
*/
struct btrfs_bio {
- unsigned int mirror_num:7;
-
- /*
- * Extra indicator for metadata bios.
- * For some btrfs bios they use pages without a mapping, thus
- * we can not rely on page->mapping->host to determine if
- * it's a metadata bio.
- */
- unsigned int is_metadata:1;
- struct bvec_iter iter;
-
- /* for direct I/O */
+ /* Inode and offset into it that this I/O operates on. */
+ struct btrfs_inode *inode;
u64 file_offset;
- /* @device is for stripe IO submission. */
- struct btrfs_device *device;
union {
- /* For data checksum verification. */
+ /*
+ * Data checksumming and original I/O information for internal
+ * use in the btrfs_submit_bio machinery.
+ */
struct {
u8 *csum;
u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
+ struct bvec_iter saved_iter;
};
/* For metadata parentness verification. */
@@ -62,7 +53,9 @@ struct btrfs_bio {
btrfs_bio_end_io_t end_io;
void *private;
- /* For read end I/O handling */
+ /* For internal use in read end I/O handling */
+ unsigned int mirror_num;
+ atomic_t pending_ios;
struct work_struct end_io_work;
/*
@@ -80,11 +73,11 @@ static inline struct btrfs_bio *btrfs_bio(struct bio *bio)
int __init btrfs_bioset_init(void);
void __cold btrfs_bioset_exit(void);
+void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_inode *inode,
+ btrfs_bio_end_io_t end_io, void *private);
struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
+ struct btrfs_inode *inode,
btrfs_bio_end_io_t end_io, void *private);
-struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
- btrfs_bio_end_io_t end_io, void *private);
-
static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
{
@@ -92,34 +85,10 @@ static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
bbio->end_io(bbio);
}
-static inline void btrfs_bio_free_csum(struct btrfs_bio *bbio)
-{
- if (bbio->is_metadata)
- return;
- if (bbio->csum != bbio->csum_inline) {
- kfree(bbio->csum);
- bbio->csum = NULL;
- }
-}
+/* Bio only refers to one ordered extent. */
+#define REQ_BTRFS_ONE_ORDERED REQ_DRV
-/*
- * Iterate through a btrfs_bio (@bbio) on a per-sector basis.
- *
- * bvl - struct bio_vec
- * bbio - struct btrfs_bio
- * iters - struct bvec_iter
- * bio_offset - unsigned int
- */
-#define btrfs_bio_for_each_sector(fs_info, bvl, bbio, iter, bio_offset) \
- for ((iter) = (bbio)->iter, (bio_offset) = 0; \
- (iter).bi_size && \
- (((bvl) = bio_iter_iovec((&(bbio)->bio), (iter))), 1); \
- (bio_offset) += fs_info->sectorsize, \
- bio_advance_iter_single(&(bbio)->bio, &(iter), \
- (fs_info)->sectorsize))
-
-void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
- int mirror_num);
+void btrfs_submit_bio(struct bio *bio, int mirror_num);
int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
u64 length, u64 logical, struct page *page,
unsigned int pg_offset, int mirror_num);
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 708d843daa72..5b10401d803b 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/sizes.h>
#include <linux/list_sort.h>
#include "misc.h"
#include "ctree.h"
@@ -539,6 +540,153 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end
return total_added;
}
+/*
+ * Get an arbitrary extent item index / max_index through the block group
+ *
+ * @block_group the block group to sample from
+ * @index: the integral step through the block group to grab from
+ * @max_index: the granularity of the sampling
+ * @key: return value parameter for the item we find
+ *
+ * Pre-conditions on indices:
+ * 0 <= index <= max_index
+ * 0 < max_index
+ *
+ * Returns: 0 on success, 1 if the search didn't yield a useful item, negative
+ * error code on error.
+ */
+static int sample_block_group_extent_item(struct btrfs_caching_control *caching_ctl,
+ struct btrfs_block_group *block_group,
+ int index, int max_index,
+ struct btrfs_key *key)
+{
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
+ struct btrfs_root *extent_root;
+ int ret = 0;
+ u64 search_offset;
+ u64 search_end = block_group->start + block_group->length;
+ struct btrfs_path *path;
+
+ ASSERT(index >= 0);
+ ASSERT(index <= max_index);
+ ASSERT(max_index > 0);
+ lockdep_assert_held(&caching_ctl->mutex);
+ lockdep_assert_held_read(&fs_info->commit_root_sem);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ extent_root = btrfs_extent_root(fs_info, max_t(u64, block_group->start,
+ BTRFS_SUPER_INFO_OFFSET));
+
+ path->skip_locking = 1;
+ path->search_commit_root = 1;
+ path->reada = READA_FORWARD;
+
+ search_offset = index * div_u64(block_group->length, max_index);
+ key->objectid = block_group->start + search_offset;
+ key->type = BTRFS_EXTENT_ITEM_KEY;
+ key->offset = 0;
+
+ while (1) {
+ ret = btrfs_search_forward(extent_root, key, path, 0);
+ if (ret != 0)
+ goto out;
+ /* Success; sampled an extent item in the block group */
+ if (key->type == BTRFS_EXTENT_ITEM_KEY &&
+ key->objectid >= block_group->start &&
+ key->objectid + key->offset <= search_end)
+ goto out;
+
+ /* We can't possibly find a valid extent item anymore */
+ if (key->objectid >= search_end) {
+ ret = 1;
+ break;
+ }
+ if (key->type < BTRFS_EXTENT_ITEM_KEY)
+ key->type = BTRFS_EXTENT_ITEM_KEY;
+ else
+ key->objectid++;
+ btrfs_release_path(path);
+ up_read(&fs_info->commit_root_sem);
+ mutex_unlock(&caching_ctl->mutex);
+ cond_resched();
+ mutex_lock(&caching_ctl->mutex);
+ down_read(&fs_info->commit_root_sem);
+ }
+out:
+ lockdep_assert_held(&caching_ctl->mutex);
+ lockdep_assert_held_read(&fs_info->commit_root_sem);
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * Best effort attempt to compute a block group's size class while caching it.
+ *
+ * @block_group: the block group we are caching
+ *
+ * We cannot infer the size class while adding free space extents, because that
+ * logic doesn't care about contiguous file extents (it doesn't differentiate
+ * between a 100M extent and 100 contiguous 1M extents). So we need to read the
+ * file extent items. Reading all of them is quite wasteful, because usually
+ * only a handful are enough to give a good answer. Therefore, we just grab 5 of
+ * them at even steps through the block group and pick the smallest size class
+ * we see. Since size class is best effort, and not guaranteed in general,
+ * inaccuracy is acceptable.
+ *
+ * To be more explicit about why this algorithm makes sense:
+ *
+ * If we are caching in a block group from disk, then there are three major cases
+ * to consider:
+ * 1. the block group is well behaved and all extents in it are the same size
+ * class.
+ * 2. the block group is mostly one size class with rare exceptions for last
+ * ditch allocations
+ * 3. the block group was populated before size classes and can have a totally
+ * arbitrary mix of size classes.
+ *
+ * In case 1, looking at any extent in the block group will yield the correct
+ * result. For the mixed cases, taking the minimum size class seems like a good
+ * approximation, since gaps from frees will be usable to the size class. For
+ * 2., a small handful of file extents is likely to yield the right answer. For
+ * 3, we can either read every file extent, or admit that this is best effort
+ * anyway and try to stay fast.
+ *
+ * Returns: 0 on success, negative error code on error.
+ */
+static int load_block_group_size_class(struct btrfs_caching_control *caching_ctl,
+ struct btrfs_block_group *block_group)
+{
+ struct btrfs_key key;
+ int i;
+ u64 min_size = block_group->length;
+ enum btrfs_block_group_size_class size_class = BTRFS_BG_SZ_NONE;
+ int ret;
+
+ if (!btrfs_block_group_should_use_size_class(block_group))
+ return 0;
+
+ for (i = 0; i < 5; ++i) {
+ ret = sample_block_group_extent_item(caching_ctl, block_group, i, 5, &key);
+ if (ret < 0)
+ goto out;
+ if (ret > 0)
+ continue;
+ min_size = min_t(u64, min_size, key.offset);
+ size_class = btrfs_calc_block_group_size_class(min_size);
+ }
+ if (size_class != BTRFS_BG_SZ_NONE) {
+ spin_lock(&block_group->lock);
+ block_group->size_class = size_class;
+ spin_unlock(&block_group->lock);
+ }
+
+out:
+ return ret;
+}
+
static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
{
struct btrfs_block_group *block_group = caching_ctl->block_group;
@@ -683,6 +831,7 @@ static noinline void caching_thread(struct btrfs_work *work)
mutex_lock(&caching_ctl->mutex);
down_read(&fs_info->commit_root_sem);
+ load_block_group_size_class(caching_ctl, block_group);
if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
ret = load_free_space_cache(block_group);
if (ret == 1) {
@@ -1816,7 +1965,6 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
*
* @fs_info: the filesystem
* @chunk_start: logical address of block group
- * @bdev: physical device to resolve, can be NULL to indicate any device
* @physical: physical address to map to logical addresses
* @logical: return array of logical addresses which map to @physical
* @naddrs: length of @logical
@@ -1827,8 +1975,7 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
* block copies.
*/
int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
- struct block_device *bdev, u64 physical, u64 **logical,
- int *naddrs, int *stripe_len)
+ u64 physical, u64 **logical, int *naddrs, int *stripe_len)
{
struct extent_map *em;
struct map_lookup *map;
@@ -1868,9 +2015,6 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
data_stripe_length))
continue;
- if (bdev && map->stripes[i].dev->bdev != bdev)
- continue;
-
stripe_nr = physical - map->stripes[i].physical;
stripe_nr = div64_u64_rem(stripe_nr, map->stripe_len, &offset);
@@ -1927,7 +2071,7 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
bytenr = btrfs_sb_offset(i);
- ret = btrfs_rmap_block(fs_info, cache->start, NULL,
+ ret = btrfs_rmap_block(fs_info, cache->start,
bytenr, &logical, &nr, &stripe_len);
if (ret)
return ret;
@@ -3330,7 +3474,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&info->delalloc_root_lock);
while (total) {
- bool reclaim;
+ bool reclaim = false;
cache = btrfs_lookup_block_group(info, bytenr);
if (!cache) {
@@ -3379,6 +3523,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
cache->space_info->disk_used -= num_bytes * factor;
reclaim = should_reclaim_block_group(cache, num_bytes);
+
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
@@ -3433,32 +3578,42 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
* reservation and return -EAGAIN, otherwise this function always succeeds.
*/
int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
- u64 ram_bytes, u64 num_bytes, int delalloc)
+ u64 ram_bytes, u64 num_bytes, int delalloc,
+ bool force_wrong_size_class)
{
struct btrfs_space_info *space_info = cache->space_info;
+ enum btrfs_block_group_size_class size_class;
int ret = 0;
spin_lock(&space_info->lock);
spin_lock(&cache->lock);
if (cache->ro) {
ret = -EAGAIN;
- } else {
- cache->reserved += num_bytes;
- space_info->bytes_reserved += num_bytes;
- trace_btrfs_space_reservation(cache->fs_info, "space_info",
- space_info->flags, num_bytes, 1);
- btrfs_space_info_update_bytes_may_use(cache->fs_info,
- space_info, -ram_bytes);
- if (delalloc)
- cache->delalloc_bytes += num_bytes;
+ goto out;
+ }
- /*
- * Compression can use less space than we reserved, so wake
- * tickets if that happens
- */
- if (num_bytes < ram_bytes)
- btrfs_try_granting_tickets(cache->fs_info, space_info);
+ if (btrfs_block_group_should_use_size_class(cache)) {
+ size_class = btrfs_calc_block_group_size_class(num_bytes);
+ ret = btrfs_use_block_group_size_class(cache, size_class, force_wrong_size_class);
+ if (ret)
+ goto out;
}
+ cache->reserved += num_bytes;
+ space_info->bytes_reserved += num_bytes;
+ trace_btrfs_space_reservation(cache->fs_info, "space_info",
+ space_info->flags, num_bytes, 1);
+ btrfs_space_info_update_bytes_may_use(cache->fs_info,
+ space_info, -ram_bytes);
+ if (delalloc)
+ cache->delalloc_bytes += num_bytes;
+
+ /*
+ * Compression can use less space than we reserved, so wake tickets if
+ * that happens.
+ */
+ if (num_bytes < ram_bytes)
+ btrfs_try_granting_tickets(cache->fs_info, space_info);
+out:
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
return ret;
@@ -4218,3 +4373,73 @@ void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount
bg->swap_extents -= amount;
spin_unlock(&bg->lock);
}
+
+enum btrfs_block_group_size_class btrfs_calc_block_group_size_class(u64 size)
+{
+ if (size <= SZ_128K)
+ return BTRFS_BG_SZ_SMALL;
+ if (size <= SZ_8M)
+ return BTRFS_BG_SZ_MEDIUM;
+ return BTRFS_BG_SZ_LARGE;
+}
+
+/*
+ * Handle a block group allocating an extent in a size class
+ *
+ * @bg: The block group we allocated in.
+ * @size_class: The size class of the allocation.
+ * @force_wrong_size_class: Whether we are desperate enough to allow
+ * mismatched size classes.
+ *
+ * Returns: 0 if the size class was valid for this block_group, -EAGAIN in the
+ * case of a race that leads to the wrong size class without
+ * force_wrong_size_class set.
+ *
+ * find_free_extent will skip block groups with a mismatched size class until
+ * it really needs to avoid ENOSPC. In that case it will set
+ * force_wrong_size_class. However, if a block group is newly allocated and
+ * doesn't yet have a size class, then it is possible for two allocations of
+ * different sizes to race and both try to use it. The loser is caught here and
+ * has to retry.
+ */
+int btrfs_use_block_group_size_class(struct btrfs_block_group *bg,
+ enum btrfs_block_group_size_class size_class,
+ bool force_wrong_size_class)
+{
+ ASSERT(size_class != BTRFS_BG_SZ_NONE);
+
+ /* The new allocation is in the right size class, do nothing */
+ if (bg->size_class == size_class)
+ return 0;
+ /*
+ * The new allocation is in a mismatched size class.
+ * This means one of two things:
+ *
+ * 1. Two tasks in find_free_extent for different size_classes raced
+ * and hit the same empty block_group. Make the loser try again.
+ * 2. A call to find_free_extent got desperate enough to set
+ * 'force_wrong_slab'. Don't change the size_class, but allow the
+ * allocation.
+ */
+ if (bg->size_class != BTRFS_BG_SZ_NONE) {
+ if (force_wrong_size_class)
+ return 0;
+ return -EAGAIN;
+ }
+ /*
+ * The happy new block group case: the new allocation is the first
+ * one in the block_group so we set size_class.
+ */
+ bg->size_class = size_class;
+
+ return 0;
+}
+
+bool btrfs_block_group_should_use_size_class(struct btrfs_block_group *bg)
+{
+ if (btrfs_is_zoned(bg->fs_info))
+ return false;
+ if (!btrfs_is_block_group_data_only(bg))
+ return false;
+ return true;
+}
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index a02ea76fd6cf..6e4a0b429ac3 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -12,6 +12,17 @@ enum btrfs_disk_cache_state {
BTRFS_DC_SETUP,
};
+enum btrfs_block_group_size_class {
+ /* Unset */
+ BTRFS_BG_SZ_NONE,
+ /* 0 < size <= 128K */
+ BTRFS_BG_SZ_SMALL,
+ /* 128K < size <= 8M */
+ BTRFS_BG_SZ_MEDIUM,
+ /* 8M < size < BG_LENGTH */
+ BTRFS_BG_SZ_LARGE,
+};
+
/*
* This describes the state of the block_group for async discard. This is due
* to the two pass nature of it where extent discarding is prioritized over
@@ -233,6 +244,7 @@ struct btrfs_block_group {
struct list_head active_bg_list;
struct work_struct zone_finish_work;
struct extent_buffer *last_eb;
+ enum btrfs_block_group_size_class size_class;
};
static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
@@ -302,7 +314,8 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans);
int btrfs_update_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, bool alloc);
int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
- u64 ram_bytes, u64 num_bytes, int delalloc);
+ u64 ram_bytes, u64 num_bytes, int delalloc,
+ bool force_wrong_size_class);
void btrfs_free_reserved_bytes(struct btrfs_block_group *cache,
u64 num_bytes, int delalloc);
int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
@@ -315,8 +328,7 @@ u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags);
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
int btrfs_free_block_groups(struct btrfs_fs_info *info);
int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
- struct block_device *bdev, u64 physical, u64 **logical,
- int *naddrs, int *stripe_len);
+ u64 physical, u64 **logical, int *naddrs, int *stripe_len);
static inline u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info)
{
@@ -346,4 +358,10 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *cache);
bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg);
void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount);
+enum btrfs_block_group_size_class btrfs_calc_block_group_size_class(u64 size);
+int btrfs_use_block_group_size_class(struct btrfs_block_group *bg,
+ enum btrfs_block_group_size_class size_class,
+ bool force_wrong_size_class);
+bool btrfs_block_group_should_use_size_class(struct btrfs_block_group *bg);
+
#endif /* BTRFS_BLOCK_GROUP_H */
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 195c09e20609..9dc21622806e 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -93,12 +93,6 @@ struct btrfs_inode {
/* the io_tree does range state (DIRTY, LOCKED etc) */
struct extent_io_tree io_tree;
- /* special utility tree used to record which mirrors have already been
- * tried when checksums fail for a given block
- */
- struct rb_root io_failure_tree;
- spinlock_t io_failure_lock;
-
/*
* Keep track of where the inode has extent items mapped in order to
* make sure the i_size adjustments are accurate
@@ -411,21 +405,11 @@ static inline void btrfs_inode_split_flags(u64 inode_item_flags,
#define CSUM_FMT "0x%*phN"
#define CSUM_FMT_VALUE(size, bytes) size, bytes
-void btrfs_submit_data_write_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num);
-void btrfs_submit_data_read_bio(struct btrfs_inode *inode, struct bio *bio,
- int mirror_num, enum btrfs_compression_type compress_type);
-void btrfs_submit_dio_repair_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num);
-blk_status_t btrfs_submit_bio_start(struct btrfs_inode *inode, struct bio *bio);
-blk_status_t btrfs_submit_bio_start_direct_io(struct btrfs_inode *inode,
- struct bio *bio,
- u64 dio_file_offset);
int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
u32 pgoff, u8 *csum, const u8 * const csum_expected);
-int btrfs_check_data_csum(struct btrfs_inode *inode, struct btrfs_bio *bbio,
- u32 bio_offset, struct page *page, u32 pgoff);
-unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
- u32 bio_offset, struct page *page,
- u64 start, u64 end);
+blk_status_t btrfs_extract_ordered_extent(struct btrfs_bio *bbio);
+bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
+ u32 bio_offset, struct bio_vec *bv);
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
u64 *ram_bytes, bool nowait, bool strict);
@@ -469,7 +453,7 @@ int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args,
int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
struct btrfs_new_inode_args *args);
void btrfs_new_inode_args_destroy(struct btrfs_new_inode_args *args);
-struct inode *btrfs_new_subvol_inode(struct user_namespace *mnt_userns,
+struct inode *btrfs_new_subvol_inode(struct mnt_idmap *idmap,
struct inode *dir);
void btrfs_set_delalloc_extent(struct btrfs_inode *inode, struct extent_state *state,
u32 bits);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 5122ca79f7ea..f42f31f22d13 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -141,12 +141,15 @@ static int compression_decompress(int type, struct list_head *ws,
static int btrfs_decompress_bio(struct compressed_bio *cb);
-static void finish_compressed_bio_read(struct compressed_bio *cb)
+static void end_compressed_bio_read(struct btrfs_bio *bbio)
{
+ struct compressed_bio *cb = bbio->private;
unsigned int index;
struct page *page;
- if (cb->status == BLK_STS_OK)
+ if (bbio->bio.bi_status)
+ cb->status = bbio->bio.bi_status;
+ else
cb->status = errno_to_blk_status(btrfs_decompress_bio(cb));
/* Release the compressed pages */
@@ -162,54 +165,6 @@ static void finish_compressed_bio_read(struct compressed_bio *cb)
/* Finally free the cb struct */
kfree(cb->compressed_pages);
kfree(cb);
-}
-
-/*
- * Verify the checksums and kick off repair if needed on the uncompressed data
- * before decompressing it into the original bio and freeing the uncompressed
- * pages.
- */
-static void end_compressed_bio_read(struct btrfs_bio *bbio)
-{
- struct compressed_bio *cb = bbio->private;
- struct inode *inode = cb->inode;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_inode *bi = BTRFS_I(inode);
- bool csum = !(bi->flags & BTRFS_INODE_NODATASUM) &&
- !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
- blk_status_t status = bbio->bio.bi_status;
- struct bvec_iter iter;
- struct bio_vec bv;
- u32 offset;
-
- btrfs_bio_for_each_sector(fs_info, bv, bbio, iter, offset) {
- u64 start = bbio->file_offset + offset;
-
- if (!status &&
- (!csum || !btrfs_check_data_csum(bi, bbio, offset,
- bv.bv_page, bv.bv_offset))) {
- btrfs_clean_io_failure(bi, start, bv.bv_page,
- bv.bv_offset);
- } else {
- int ret;
-
- refcount_inc(&cb->pending_ios);
- ret = btrfs_repair_one_sector(BTRFS_I(inode), bbio, offset,
- bv.bv_page, bv.bv_offset,
- true);
- if (ret) {
- refcount_dec(&cb->pending_ios);
- status = errno_to_blk_status(ret);
- }
- }
- }
-
- if (status)
- cb->status = status;
-
- if (refcount_dec_and_test(&cb->pending_ios))
- finish_compressed_bio_read(cb);
- btrfs_bio_free_csum(bbio);
bio_put(&bbio->bio);
}
@@ -303,68 +258,12 @@ static void btrfs_finish_compressed_write_work(struct work_struct *work)
static void end_compressed_bio_write(struct btrfs_bio *bbio)
{
struct compressed_bio *cb = bbio->private;
-
- if (bbio->bio.bi_status)
- cb->status = bbio->bio.bi_status;
-
- if (refcount_dec_and_test(&cb->pending_ios)) {
- struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
-
- btrfs_record_physical_zoned(cb->inode, cb->start, &bbio->bio);
- queue_work(fs_info->compressed_write_workers, &cb->write_end_work);
- }
- bio_put(&bbio->bio);
-}
-
-/*
- * Allocate a compressed_bio, which will be used to read/write on-disk
- * (aka, compressed) * data.
- *
- * @cb: The compressed_bio structure, which records all the needed
- * information to bind the compressed data to the uncompressed
- * page cache.
- * @disk_byten: The logical bytenr where the compressed data will be read
- * from or written to.
- * @endio_func: The endio function to call after the IO for compressed data
- * is finished.
- * @next_stripe_start: Return value of logical bytenr of where next stripe starts.
- * Let the caller know to only fill the bio up to the stripe
- * boundary.
- */
-
-
-static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_bytenr,
- blk_opf_t opf,
- btrfs_bio_end_io_t endio_func,
- u64 *next_stripe_start)
-{
struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
- struct btrfs_io_geometry geom;
- struct extent_map *em;
- struct bio *bio;
- int ret;
- bio = btrfs_bio_alloc(BIO_MAX_VECS, opf, endio_func, cb);
- bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
+ cb->status = bbio->bio.bi_status;
+ queue_work(fs_info->compressed_write_workers, &cb->write_end_work);
- em = btrfs_get_chunk_map(fs_info, disk_bytenr, fs_info->sectorsize);
- if (IS_ERR(em)) {
- bio_put(bio);
- return ERR_CAST(em);
- }
-
- if (bio_op(bio) == REQ_OP_ZONE_APPEND)
- bio_set_dev(bio, em->map_lookup->stripes[0].dev->bdev);
-
- ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio), disk_bytenr, &geom);
- free_extent_map(em);
- if (ret < 0) {
- bio_put(bio);
- return ERR_PTR(ret);
- }
- *next_stripe_start = disk_bytenr + geom.len;
- refcount_inc(&cb->pending_ios);
- return bio;
+ bio_put(&bbio->bio);
}
/*
@@ -389,18 +288,13 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
struct bio *bio = NULL;
struct compressed_bio *cb;
u64 cur_disk_bytenr = disk_start;
- u64 next_stripe_start;
blk_status_t ret = BLK_STS_OK;
- int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
- const bool use_append = btrfs_use_zone_append(inode, disk_start);
- const enum req_op bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
IS_ALIGNED(len, fs_info->sectorsize));
cb = kmalloc(sizeof(struct compressed_bio), GFP_NOFS);
if (!cb)
return BLK_STS_RESOURCE;
- refcount_set(&cb->pending_ios, 1);
cb->status = BLK_STS_OK;
cb->inode = &inode->vfs_inode;
cb->start = start;
@@ -411,8 +305,16 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
INIT_WORK(&cb->write_end_work, btrfs_finish_compressed_write_work);
cb->nr_pages = nr_pages;
- if (blkcg_css)
+ if (blkcg_css) {
kthread_associate_blkcg(blkcg_css);
+ write_flags |= REQ_CGROUP_PUNT;
+ }
+
+ write_flags |= REQ_BTRFS_ONE_ORDERED;
+ bio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_WRITE | write_flags,
+ BTRFS_I(cb->inode), end_compressed_bio_write, cb);
+ bio->bi_iter.bi_sector = cur_disk_bytenr >> SECTOR_SHIFT;
+ btrfs_bio(bio)->file_offset = start;
while (cur_disk_bytenr < disk_start + compressed_len) {
u64 offset = cur_disk_bytenr - disk_start;
@@ -420,77 +322,30 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
unsigned int real_size;
unsigned int added;
struct page *page = compressed_pages[index];
- bool submit = false;
-
- /* Allocate new bio if submitted or not yet allocated */
- if (!bio) {
- bio = alloc_compressed_bio(cb, cur_disk_bytenr,
- bio_op | write_flags, end_compressed_bio_write,
- &next_stripe_start);
- if (IS_ERR(bio)) {
- ret = errno_to_blk_status(PTR_ERR(bio));
- break;
- }
- if (blkcg_css)
- bio->bi_opf |= REQ_CGROUP_PUNT;
- }
- /*
- * We should never reach next_stripe_start start as we will
- * submit comp_bio when reach the boundary immediately.
- */
- ASSERT(cur_disk_bytenr != next_stripe_start);
/*
* We have various limits on the real read size:
- * - stripe boundary
* - page boundary
* - compressed length boundary
*/
- real_size = min_t(u64, U32_MAX, next_stripe_start - cur_disk_bytenr);
- real_size = min_t(u64, real_size, PAGE_SIZE - offset_in_page(offset));
+ real_size = min_t(u64, U32_MAX, PAGE_SIZE - offset_in_page(offset));
real_size = min_t(u64, real_size, compressed_len - offset);
ASSERT(IS_ALIGNED(real_size, fs_info->sectorsize));
- if (use_append)
- added = bio_add_zone_append_page(bio, page, real_size,
- offset_in_page(offset));
- else
- added = bio_add_page(bio, page, real_size,
- offset_in_page(offset));
- /* Reached zoned boundary */
- if (added == 0)
- submit = true;
-
+ added = bio_add_page(bio, page, real_size, offset_in_page(offset));
+ /*
+ * Maximum compressed extent is smaller than bio size limit,
+ * thus bio_add_page() should always success.
+ */
+ ASSERT(added == real_size);
cur_disk_bytenr += added;
- /* Reached stripe boundary */
- if (cur_disk_bytenr == next_stripe_start)
- submit = true;
-
- /* Finished the range */
- if (cur_disk_bytenr == disk_start + compressed_len)
- submit = true;
-
- if (submit) {
- if (!skip_sum) {
- ret = btrfs_csum_one_bio(inode, bio, start, true);
- if (ret) {
- btrfs_bio_end_io(btrfs_bio(bio), ret);
- break;
- }
- }
-
- ASSERT(bio->bi_iter.bi_size);
- btrfs_submit_bio(fs_info, bio, 0);
- bio = NULL;
- }
- cond_resched();
}
+ /* Finished the range. */
+ ASSERT(bio->bi_iter.bi_size);
+ btrfs_submit_bio(bio, 0);
if (blkcg_css)
kthread_associate_blkcg(NULL);
-
- if (refcount_dec_and_test(&cb->pending_ios))
- finish_compressed_bio_write(cb);
return ret;
}
@@ -667,10 +522,9 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
struct extent_map_tree *em_tree;
struct compressed_bio *cb;
unsigned int compressed_len;
- struct bio *comp_bio = NULL;
+ struct bio *comp_bio;
const u64 disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT;
u64 cur_disk_byte = disk_bytenr;
- u64 next_stripe_start;
u64 file_offset;
u64 em_len;
u64 em_start;
@@ -703,7 +557,6 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
goto out;
}
- refcount_set(&cb->pending_ios, 1);
cb->status = BLK_STS_OK;
cb->inode = inode;
@@ -737,37 +590,23 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
/* include any pages we added in add_ra-bio_pages */
cb->len = bio->bi_iter.bi_size;
+ comp_bio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, BTRFS_I(cb->inode),
+ end_compressed_bio_read, cb);
+ comp_bio->bi_iter.bi_sector = (cur_disk_byte >> SECTOR_SHIFT);
+
while (cur_disk_byte < disk_bytenr + compressed_len) {
u64 offset = cur_disk_byte - disk_bytenr;
unsigned int index = offset >> PAGE_SHIFT;
unsigned int real_size;
unsigned int added;
struct page *page = cb->compressed_pages[index];
- bool submit = false;
-
- /* Allocate new bio if submitted or not yet allocated */
- if (!comp_bio) {
- comp_bio = alloc_compressed_bio(cb, cur_disk_byte,
- REQ_OP_READ, end_compressed_bio_read,
- &next_stripe_start);
- if (IS_ERR(comp_bio)) {
- cb->status = errno_to_blk_status(PTR_ERR(comp_bio));
- break;
- }
- }
- /*
- * We should never reach next_stripe_start start as we will
- * submit comp_bio when reach the boundary immediately.
- */
- ASSERT(cur_disk_byte != next_stripe_start);
+
/*
* We have various limit on the real read size:
- * - stripe boundary
* - page boundary
* - compressed length boundary
*/
- real_size = min_t(u64, U32_MAX, next_stripe_start - cur_disk_byte);
- real_size = min_t(u64, real_size, PAGE_SIZE - offset_in_page(offset));
+ real_size = min_t(u64, U32_MAX, PAGE_SIZE - offset_in_page(offset));
real_size = min_t(u64, real_size, compressed_len - offset);
ASSERT(IS_ALIGNED(real_size, fs_info->sectorsize));
@@ -778,45 +617,20 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
*/
ASSERT(added == real_size);
cur_disk_byte += added;
-
- /* Reached stripe boundary, need to submit */
- if (cur_disk_byte == next_stripe_start)
- submit = true;
-
- /* Has finished the range, need to submit */
- if (cur_disk_byte == disk_bytenr + compressed_len)
- submit = true;
-
- if (submit) {
- /* Save the original iter for read repair */
- if (bio_op(comp_bio) == REQ_OP_READ)
- btrfs_bio(comp_bio)->iter = comp_bio->bi_iter;
-
- /*
- * Save the initial offset of this chunk, as there
- * is no direct correlation between compressed pages and
- * the original file offset. The field is only used for
- * priting error messages.
- */
- btrfs_bio(comp_bio)->file_offset = file_offset;
-
- ret = btrfs_lookup_bio_sums(inode, comp_bio, NULL);
- if (ret) {
- btrfs_bio_end_io(btrfs_bio(comp_bio), ret);
- break;
- }
-
- ASSERT(comp_bio->bi_iter.bi_size);
- btrfs_submit_bio(fs_info, comp_bio, mirror_num);
- comp_bio = NULL;
- }
}
if (memstall)
psi_memstall_leave(&pflags);
- if (refcount_dec_and_test(&cb->pending_ios))
- finish_compressed_bio_read(cb);
+ /*
+ * Stash the initial offset of this chunk, as there is no direct
+ * correlation between compressed pages and the original file offset.
+ * The field is only used for printing error messages anyway.
+ */
+ btrfs_bio(comp_bio)->file_offset = file_offset;
+
+ ASSERT(comp_bio->bi_iter.bi_size);
+ btrfs_submit_bio(comp_bio, mirror_num);
return;
fail:
@@ -1609,7 +1423,7 @@ static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
index_end = end >> PAGE_SHIFT;
/* Don't miss unaligned end */
- if (!IS_ALIGNED(end, PAGE_SIZE))
+ if (!PAGE_ALIGNED(end))
index_end++;
curr_sample_pos = 0;
@@ -1642,7 +1456,7 @@ static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
*
* For now is's a naive and optimistic 'return true', we'll extend the logic to
* quickly (compared to direct compression) detect data characteristics
- * (compressible/uncompressible) to avoid wasting CPU time on uncompressible
+ * (compressible/incompressible) to avoid wasting CPU time on incompressible
* data.
*
* The following types of analysis can be performed:
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 6209d40a1e08..a5e3377db9ad 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -31,9 +31,6 @@ static_assert((BTRFS_MAX_COMPRESSED % PAGE_SIZE) == 0);
#define BTRFS_ZLIB_DEFAULT_LEVEL 3
struct compressed_bio {
- /* Number of outstanding bios */
- refcount_t pending_ios;
-
/* Number of compressed pages in the array */
unsigned int nr_pages;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 4754c9101a4c..a5b6bb54545f 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -484,7 +484,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (ret)
return ret;
}
- btrfs_clean_tree_block(buf);
+ btrfs_clear_buffer_dirty(trans, buf);
*last_ref = 1;
}
return 0;
@@ -853,8 +853,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
/*
* Search for a key in the given extent_buffer.
*
- * The lower boundary for the search is specified by the slot number @low. Use a
- * value of 0 to search over the whole extent buffer.
+ * The lower boundary for the search is specified by the slot number @first_slot.
+ * Use a value of 0 to search over the whole extent buffer.
*
* The slot in the extent buffer is returned via @slot. If the key exists in the
* extent buffer, then @slot will point to the slot where the key is, otherwise
@@ -863,18 +863,23 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
* Slot may point to the total number of items (i.e. one position beyond the last
* key) if the key is bigger than the last key in the extent buffer.
*/
-static noinline int generic_bin_search(struct extent_buffer *eb, int low,
- const struct btrfs_key *key, int *slot)
+int btrfs_generic_bin_search(struct extent_buffer *eb, int first_slot,
+ const struct btrfs_key *key, int *slot)
{
unsigned long p;
int item_size;
- int high = btrfs_header_nritems(eb);
+ /*
+ * Use unsigned types for the low and high slots, so that we get a more
+ * efficient division in the search loop below.
+ */
+ u32 low = first_slot;
+ u32 high = btrfs_header_nritems(eb);
int ret;
const int key_size = sizeof(struct btrfs_disk_key);
- if (low > high) {
+ if (unlikely(low > high)) {
btrfs_err(eb->fs_info,
- "%s: low (%d) > high (%d) eb %llu owner %llu level %d",
+ "%s: low (%u) > high (%u) eb %llu owner %llu level %d",
__func__, low, high, eb->start,
btrfs_header_owner(eb), btrfs_header_level(eb));
return -EINVAL;
@@ -925,16 +930,6 @@ static noinline int generic_bin_search(struct extent_buffer *eb, int low,
return 1;
}
-/*
- * Simple binary search on an extent buffer. Works for both leaves and nodes, and
- * always searches over the whole range of keys (slot 0 to slot 'nritems - 1').
- */
-int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
- int *slot)
-{
- return generic_bin_search(eb, 0, key, slot);
-}
-
static void root_add_used(struct btrfs_root *root, u32 size)
{
spin_lock(&root->accounting_lock);
@@ -1054,7 +1049,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
path->locks[level] = 0;
path->nodes[level] = NULL;
- btrfs_clean_tree_block(mid);
+ btrfs_clear_buffer_dirty(trans, mid);
btrfs_tree_unlock(mid);
/* once for the path */
free_extent_buffer(mid);
@@ -1115,7 +1110,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (wret < 0 && wret != -ENOSPC)
ret = wret;
if (btrfs_header_nritems(right) == 0) {
- btrfs_clean_tree_block(right);
+ btrfs_clear_buffer_dirty(trans, right);
btrfs_tree_unlock(right);
del_ptr(root, path, level + 1, pslot + 1);
root_sub_used(root, right->len);
@@ -1161,7 +1156,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
BUG_ON(wret == 1);
}
if (btrfs_header_nritems(mid) == 0) {
- btrfs_clean_tree_block(mid);
+ btrfs_clear_buffer_dirty(trans, mid);
btrfs_tree_unlock(mid);
del_ptr(root, path, level + 1, pslot);
root_sub_used(root, mid->len);
@@ -1869,7 +1864,7 @@ static inline int search_for_key_slot(struct extent_buffer *eb,
return 0;
}
- return generic_bin_search(eb, search_low_slot, key, slot);
+ return btrfs_generic_bin_search(eb, search_low_slot, key, slot);
}
static int search_leaf(struct btrfs_trans_handle *trans,
@@ -3041,7 +3036,8 @@ noinline int btrfs_leaf_free_space(struct extent_buffer *leaf)
* min slot controls the lowest index we're willing to push to the
* right. We'll push up to and including min_slot, but no lower
*/
-static noinline int __push_leaf_right(struct btrfs_path *path,
+static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
int data_size, int empty,
struct extent_buffer *right,
int free_space, u32 left_nritems,
@@ -3139,7 +3135,7 @@ static noinline int __push_leaf_right(struct btrfs_path *path,
if (left_nritems)
btrfs_mark_buffer_dirty(left);
else
- btrfs_clean_tree_block(left);
+ btrfs_clear_buffer_dirty(trans, left);
btrfs_mark_buffer_dirty(right);
@@ -3151,7 +3147,7 @@ static noinline int __push_leaf_right(struct btrfs_path *path,
if (path->slots[0] >= left_nritems) {
path->slots[0] -= left_nritems;
if (btrfs_header_nritems(path->nodes[0]) == 0)
- btrfs_clean_tree_block(path->nodes[0]);
+ btrfs_clear_buffer_dirty(trans, path->nodes[0]);
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = right;
@@ -3243,8 +3239,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
return 0;
}
- return __push_leaf_right(path, min_data_size, empty,
- right, free_space, left_nritems, min_slot);
+ return __push_leaf_right(trans, path, min_data_size, empty, right,
+ free_space, left_nritems, min_slot);
out_unlock:
btrfs_tree_unlock(right);
free_extent_buffer(right);
@@ -3259,7 +3255,8 @@ out_unlock:
* item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
* items
*/
-static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
+static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path, int data_size,
int empty, struct extent_buffer *left,
int free_space, u32 right_nritems,
u32 max_slot)
@@ -3363,7 +3360,7 @@ static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
if (right_nritems)
btrfs_mark_buffer_dirty(right);
else
- btrfs_clean_tree_block(right);
+ btrfs_clear_buffer_dirty(trans, right);
btrfs_item_key(right, &disk_key, 0);
fixup_low_keys(path, &disk_key, 1);
@@ -3449,9 +3446,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
ret = -EUCLEAN;
goto out;
}
- return __push_leaf_left(path, min_data_size,
- empty, left, free_space, right_nritems,
- max_slot);
+ return __push_leaf_left(trans, path, min_data_size, empty, left,
+ free_space, right_nritems, max_slot);
out:
btrfs_tree_unlock(left);
free_extent_buffer(left);
@@ -4400,7 +4396,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (leaf == root->node) {
btrfs_set_header_level(leaf, 0);
} else {
- btrfs_clean_tree_block(leaf);
+ btrfs_clear_buffer_dirty(trans, leaf);
btrfs_del_leaf(trans, root, path, leaf);
}
} else {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6965703a81b6..97897107fab5 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -507,6 +507,21 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range);
/* ctree.c */
int __init btrfs_ctree_init(void);
void __cold btrfs_ctree_exit(void);
+
+int btrfs_generic_bin_search(struct extent_buffer *eb, int first_slot,
+ const struct btrfs_key *key, int *slot);
+
+/*
+ * Simple binary search on an extent buffer. Works for both leaves and nodes, and
+ * always searches over the whole range of keys (slot 0 to slot 'nritems - 1').
+ */
+static inline int btrfs_bin_search(struct extent_buffer *eb,
+ const struct btrfs_key *key,
+ int *slot)
+{
+ return btrfs_generic_bin_search(eb, 0, key, slot);
+}
+
int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
int *slot);
int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2);
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index 0a3c261b69c9..8065341d831a 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -358,8 +358,10 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
goto out;
path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
level = btrfs_header_level(root->node);
@@ -763,7 +765,7 @@ again:
break;
unlock_page(page);
- btrfs_start_ordered_extent(ordered, 1);
+ btrfs_start_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
lock_page(page);
/*
@@ -997,7 +999,7 @@ next:
}
#define CLUSTER_SIZE (SZ_256K)
-static_assert(IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
+static_assert(PAGE_ALIGNED(CLUSTER_SIZE));
/*
* Defrag one contiguous target range.
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 573ebab886e2..886ffb232eac 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -437,8 +437,7 @@ int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs,
return 0;
}
-static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_root *delayed_refs,
+static inline void drop_delayed_ref(struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head,
struct btrfs_delayed_ref_node *ref)
{
@@ -452,8 +451,7 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
atomic_dec(&delayed_refs->num_entries);
}
-static bool merge_ref(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_root *delayed_refs,
+static bool merge_ref(struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head,
struct btrfs_delayed_ref_node *ref,
u64 seq)
@@ -482,10 +480,10 @@ static bool merge_ref(struct btrfs_trans_handle *trans,
mod = -next->ref_mod;
}
- drop_delayed_ref(trans, delayed_refs, head, next);
+ drop_delayed_ref(delayed_refs, head, next);
ref->ref_mod += mod;
if (ref->ref_mod == 0) {
- drop_delayed_ref(trans, delayed_refs, head, ref);
+ drop_delayed_ref(delayed_refs, head, ref);
done = true;
} else {
/*
@@ -499,11 +497,10 @@ static bool merge_ref(struct btrfs_trans_handle *trans,
return done;
}
-void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
+void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head)
{
- struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_ref_node *ref;
struct rb_node *node;
u64 seq = 0;
@@ -524,7 +521,7 @@ again:
ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
if (seq && ref->seq >= seq)
continue;
- if (merge_ref(trans, delayed_refs, head, ref, seq))
+ if (merge_ref(delayed_refs, head, ref, seq))
goto again;
}
}
@@ -601,8 +598,7 @@ void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
* Return 0 for insert.
* Return >0 for merge.
*/
-static int insert_delayed_ref(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_root *root,
+static int insert_delayed_ref(struct btrfs_delayed_ref_root *root,
struct btrfs_delayed_ref_head *href,
struct btrfs_delayed_ref_node *ref)
{
@@ -641,7 +637,7 @@ static int insert_delayed_ref(struct btrfs_trans_handle *trans,
/* remove existing tail if its ref_mod is zero */
if (exist->ref_mod == 0)
- drop_delayed_ref(trans, root, href, exist);
+ drop_delayed_ref(root, href, exist);
spin_unlock(&href->lock);
return ret;
inserted:
@@ -978,7 +974,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
head_ref = add_delayed_ref_head(trans, head_ref, record,
action, &qrecord_inserted);
- ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
+ ret = insert_delayed_ref(delayed_refs, head_ref, &ref->node);
spin_unlock(&delayed_refs->lock);
/*
@@ -1070,7 +1066,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
head_ref = add_delayed_ref_head(trans, head_ref, record,
action, &qrecord_inserted);
- ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
+ ret = insert_delayed_ref(delayed_refs, head_ref, &ref->node);
spin_unlock(&delayed_refs->lock);
/*
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index d6304b690ec4..2eb34abf700f 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -357,7 +357,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
struct btrfs_delayed_extent_op *extent_op);
-void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
+void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head);
diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c
index ff2e524d9937..317aeff6c1da 100644
--- a/fs/btrfs/discard.c
+++ b/fs/btrfs/discard.c
@@ -78,6 +78,7 @@ static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
struct btrfs_block_group *block_group)
{
+ lockdep_assert_held(&discard_ctl->lock);
if (!btrfs_run_discard_work(discard_ctl))
return;
@@ -89,6 +90,8 @@ static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
BTRFS_DISCARD_DELAY);
block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
}
+ if (list_empty(&block_group->discard_list))
+ btrfs_get_block_group(block_group);
list_move_tail(&block_group->discard_list,
get_discard_list(discard_ctl, block_group));
@@ -108,8 +111,12 @@ static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
struct btrfs_block_group *block_group)
{
+ bool queued;
+
spin_lock(&discard_ctl->lock);
+ queued = !list_empty(&block_group->discard_list);
+
if (!btrfs_run_discard_work(discard_ctl)) {
spin_unlock(&discard_ctl->lock);
return;
@@ -121,6 +128,8 @@ static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
block_group->discard_eligible_time = (ktime_get_ns() +
BTRFS_DISCARD_UNUSED_DELAY);
block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
+ if (!queued)
+ btrfs_get_block_group(block_group);
list_add_tail(&block_group->discard_list,
&discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
@@ -131,6 +140,7 @@ static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
struct btrfs_block_group *block_group)
{
bool running = false;
+ bool queued = false;
spin_lock(&discard_ctl->lock);
@@ -140,7 +150,16 @@ static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
}
block_group->discard_eligible_time = 0;
+ queued = !list_empty(&block_group->discard_list);
list_del_init(&block_group->discard_list);
+ /*
+ * If the block group is currently running in the discard workfn, we
+ * don't want to deref it, since it's still being used by the workfn.
+ * The workfn will notice this case and deref the block group when it is
+ * finished.
+ */
+ if (queued && !running)
+ btrfs_put_block_group(block_group);
spin_unlock(&discard_ctl->lock);
@@ -214,10 +233,12 @@ again:
if (block_group && now >= block_group->discard_eligible_time) {
if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
block_group->used != 0) {
- if (btrfs_is_block_group_data_only(block_group))
+ if (btrfs_is_block_group_data_only(block_group)) {
__add_to_discard_list(discard_ctl, block_group);
- else
+ } else {
list_del_init(&block_group->discard_list);
+ btrfs_put_block_group(block_group);
+ }
goto again;
}
if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
@@ -511,6 +532,15 @@ static void btrfs_discard_workfn(struct work_struct *work)
spin_lock(&discard_ctl->lock);
discard_ctl->prev_discard = trimmed;
discard_ctl->prev_discard_time = now;
+ /*
+ * If the block group was removed from the discard list while it was
+ * running in this workfn, then we didn't deref it, since this function
+ * still owned that reference. But we set the discard_ctl->block_group
+ * back to NULL, so we can use that condition to know that now we need
+ * to deref the block_group.
+ */
+ if (discard_ctl->block_group == NULL)
+ btrfs_put_block_group(block_group);
discard_ctl->block_group = NULL;
__btrfs_discard_schedule_work(discard_ctl, now, false);
spin_unlock(&discard_ctl->lock);
@@ -651,8 +681,12 @@ void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
bg_list) {
list_del_init(&block_group->bg_list);
- btrfs_put_block_group(block_group);
btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
+ /*
+ * This put is for the get done by btrfs_mark_bg_unused.
+ * Queueing discard incremented it for discard's reference.
+ */
+ btrfs_put_block_group(block_group);
}
spin_unlock(&fs_info->unused_bgs_lock);
}
@@ -683,6 +717,7 @@ static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl)
if (block_group->used == 0)
btrfs_mark_bg_unused(block_group);
spin_lock(&discard_ctl->lock);
+ btrfs_put_block_group(block_group);
}
}
spin_unlock(&discard_ctl->lock);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0888d484df80..b53f0e30ce2b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -79,23 +79,6 @@ static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
}
/*
- * async submit bios are used to offload expensive checksumming
- * onto the worker threads. They checksum file and metadata bios
- * just before they are sent down the IO stack.
- */
-struct async_submit_bio {
- struct btrfs_inode *inode;
- struct bio *bio;
- enum btrfs_wq_submit_cmd submit_cmd;
- int mirror_num;
-
- /* Optional parameter for used by direct io */
- u64 dio_file_offset;
- struct btrfs_work work;
- blk_status_t status;
-};
-
-/*
* Compute the csum of a btree block and store the result to provided buffer.
*/
static void csum_tree_block(struct extent_buffer *buf, u8 *result)
@@ -367,7 +350,14 @@ error:
btrfs_print_tree(eb, 0);
btrfs_err(fs_info, "block=%llu write time tree block corruption detected",
eb->start);
- WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+ /*
+ * Be noisy if this is an extent buffer from a log tree. We don't abort
+ * a transaction in case there's a bad log tree extent buffer, we just
+ * fallback to a transaction commit. Still we want to know when there is
+ * a bad log tree extent buffer, as that may signal a bug somewhere.
+ */
+ WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG) ||
+ btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID);
return ret;
}
@@ -448,6 +438,22 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct bio_vec *bvec
return csum_one_extent_buffer(eb);
}
+blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
+{
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
+ struct bvec_iter iter;
+ struct bio_vec bv;
+ int ret = 0;
+
+ bio_for_each_segment(bv, &bbio->bio, iter) {
+ ret = csum_dirty_buffer(fs_info, &bv);
+ if (ret)
+ break;
+ }
+
+ return errno_to_blk_status(ret);
+}
+
static int check_tree_block_fsid(struct extent_buffer *eb)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
@@ -530,6 +536,9 @@ static int validate_extent_buffer(struct extent_buffer *eb,
}
if (found_level != check->level) {
+ btrfs_err(fs_info,
+ "level verify failed on logical %llu mirror %u wanted %u found %u",
+ eb->start, eb->read_mirror, check->level, found_level);
ret = -EIO;
goto out;
}
@@ -690,172 +699,6 @@ err:
return ret;
}
-static void run_one_async_start(struct btrfs_work *work)
-{
- struct async_submit_bio *async;
- blk_status_t ret;
-
- async = container_of(work, struct async_submit_bio, work);
- switch (async->submit_cmd) {
- case WQ_SUBMIT_METADATA:
- ret = btree_submit_bio_start(async->bio);
- break;
- case WQ_SUBMIT_DATA:
- ret = btrfs_submit_bio_start(async->inode, async->bio);
- break;
- case WQ_SUBMIT_DATA_DIO:
- ret = btrfs_submit_bio_start_direct_io(async->inode,
- async->bio, async->dio_file_offset);
- break;
- }
- if (ret)
- async->status = ret;
-}
-
-/*
- * In order to insert checksums into the metadata in large chunks, we wait
- * until bio submission time. All the pages in the bio are checksummed and
- * sums are attached onto the ordered extent record.
- *
- * At IO completion time the csums attached on the ordered extent record are
- * inserted into the tree.
- */
-static void run_one_async_done(struct btrfs_work *work)
-{
- struct async_submit_bio *async =
- container_of(work, struct async_submit_bio, work);
- struct btrfs_inode *inode = async->inode;
- struct btrfs_bio *bbio = btrfs_bio(async->bio);
-
- /* If an error occurred we just want to clean up the bio and move on */
- if (async->status) {
- btrfs_bio_end_io(bbio, async->status);
- return;
- }
-
- /*
- * All of the bios that pass through here are from async helpers.
- * Use REQ_CGROUP_PUNT to issue them from the owning cgroup's context.
- * This changes nothing when cgroups aren't in use.
- */
- async->bio->bi_opf |= REQ_CGROUP_PUNT;
- btrfs_submit_bio(inode->root->fs_info, async->bio, async->mirror_num);
-}
-
-static void run_one_async_free(struct btrfs_work *work)
-{
- struct async_submit_bio *async;
-
- async = container_of(work, struct async_submit_bio, work);
- kfree(async);
-}
-
-/*
- * Submit bio to an async queue.
- *
- * Retrun:
- * - true if the work has been succesfuly submitted
- * - false in case of error
- */
-bool btrfs_wq_submit_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num,
- u64 dio_file_offset, enum btrfs_wq_submit_cmd cmd)
-{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct async_submit_bio *async;
-
- async = kmalloc(sizeof(*async), GFP_NOFS);
- if (!async)
- return false;
-
- async->inode = inode;
- async->bio = bio;
- async->mirror_num = mirror_num;
- async->submit_cmd = cmd;
-
- btrfs_init_work(&async->work, run_one_async_start, run_one_async_done,
- run_one_async_free);
-
- async->dio_file_offset = dio_file_offset;
-
- async->status = 0;
-
- if (op_is_sync(bio->bi_opf))
- btrfs_queue_work(fs_info->hipri_workers, &async->work);
- else
- btrfs_queue_work(fs_info->workers, &async->work);
- return true;
-}
-
-static blk_status_t btree_csum_one_bio(struct bio *bio)
-{
- struct bio_vec *bvec;
- struct btrfs_root *root;
- int ret = 0;
- struct bvec_iter_all iter_all;
-
- ASSERT(!bio_flagged(bio, BIO_CLONED));
- bio_for_each_segment_all(bvec, bio, iter_all) {
- root = BTRFS_I(bvec->bv_page->mapping->host)->root;
- ret = csum_dirty_buffer(root->fs_info, bvec);
- if (ret)
- break;
- }
-
- return errno_to_blk_status(ret);
-}
-
-blk_status_t btree_submit_bio_start(struct bio *bio)
-{
- /*
- * when we're called for a write, we're already in the async
- * submission context. Just jump into btrfs_submit_bio.
- */
- return btree_csum_one_bio(bio);
-}
-
-static bool should_async_write(struct btrfs_fs_info *fs_info,
- struct btrfs_inode *bi)
-{
- if (btrfs_is_zoned(fs_info))
- return false;
- if (atomic_read(&bi->sync_writers))
- return false;
- if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags))
- return false;
- return true;
-}
-
-void btrfs_submit_metadata_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num)
-{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct btrfs_bio *bbio = btrfs_bio(bio);
- blk_status_t ret;
-
- bio->bi_opf |= REQ_META;
- bbio->is_metadata = 1;
-
- if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
- btrfs_submit_bio(fs_info, bio, mirror_num);
- return;
- }
-
- /*
- * Kthread helpers are used to submit writes so that checksumming can
- * happen in parallel across all CPUs.
- */
- if (should_async_write(fs_info, inode) &&
- btrfs_wq_submit_bio(inode, bio, mirror_num, 0, WQ_SUBMIT_METADATA))
- return;
-
- ret = btree_csum_one_bio(bio);
- if (ret) {
- btrfs_bio_end_io(bbio, ret);
- return;
- }
-
- btrfs_submit_bio(fs_info, bio, mirror_num);
-}
-
#ifdef CONFIG_MIGRATION
static int btree_migrate_folio(struct address_space *mapping,
struct folio *dst, struct folio *src, enum migrate_mode mode)
@@ -1025,22 +868,6 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
}
-void btrfs_clean_tree_block(struct extent_buffer *buf)
-{
- struct btrfs_fs_info *fs_info = buf->fs_info;
- if (btrfs_header_generation(buf) ==
- fs_info->running_transaction->transid) {
- btrfs_assert_tree_write_locked(buf);
-
- if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
- percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
- -buf->len,
- fs_info->dirty_metadata_batch);
- clear_extent_buffer_dirty(buf);
- }
- }
-}
-
static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
u64 objectid)
{
@@ -1900,6 +1727,9 @@ static int cleaner_kthread(void *arg)
goto sleep;
}
+ if (test_and_clear_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags))
+ btrfs_sysfs_feature_update(fs_info);
+
btrfs_run_delayed_iputs(fs_info);
again = btrfs_clean_one_deleted_snapshot(fs_info);
@@ -3381,6 +3211,8 @@ out:
/*
* Do various sanity and dependency checks of different features.
*
+ * @is_rw_mount: If the mount is read-write.
+ *
* This is the place for less strict checks (like for subpage or artificial
* feature dependencies).
*
@@ -3391,7 +3223,7 @@ out:
* (space cache related) can modify on-disk format like free space tree and
* screw up certain feature dependencies.
*/
-int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb)
+int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount)
{
struct btrfs_super_block *disk_super = fs_info->super_copy;
u64 incompat = btrfs_super_incompat_flags(disk_super);
@@ -3430,7 +3262,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb)
if (btrfs_super_nodesize(disk_super) > PAGE_SIZE)
incompat |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
- if (compat_ro_unsupp && !sb_rdonly(sb)) {
+ if (compat_ro_unsupp && is_rw_mount) {
btrfs_err(fs_info,
"cannot mount read-write because of unknown compat_ro features (0x%llx)",
compat_ro);
@@ -3633,7 +3465,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
goto fail_alloc;
}
- ret = btrfs_check_features(fs_info, sb);
+ ret = btrfs_check_features(fs_info, !sb_rdonly(sb));
if (ret < 0) {
err = ret;
goto fail_alloc;
@@ -5147,11 +4979,12 @@ static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
start += fs_info->nodesize;
if (!eb)
continue;
+
+ btrfs_tree_lock(eb);
wait_on_extent_buffer_writeback(eb);
+ btrfs_clear_buffer_dirty(NULL, eb);
+ btrfs_tree_unlock(eb);
- if (test_and_clear_bit(EXTENT_BUFFER_DIRTY,
- &eb->bflags))
- clear_extent_buffer_dirty(eb);
free_extent_buffer_stale(eb);
}
}
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 363935cfc084..4d5772330110 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -39,7 +39,8 @@ struct extent_buffer *btrfs_find_create_tree_block(
struct btrfs_fs_info *fs_info,
u64 bytenr, u64 owner_root,
int level);
-void btrfs_clean_tree_block(struct extent_buffer *buf);
+void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
+ struct extent_buffer *buf);
void btrfs_clear_oneshot_options(struct btrfs_fs_info *fs_info);
int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info);
int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
@@ -50,7 +51,7 @@ int __cold open_ctree(struct super_block *sb,
void __cold close_ctree(struct btrfs_fs_info *fs_info);
int btrfs_validate_super(struct btrfs_fs_info *fs_info,
struct btrfs_super_block *sb, int mirror_num);
-int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb);
+int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount);
int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev);
struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
@@ -86,7 +87,6 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
struct page *page, u64 start, u64 end,
int mirror);
-void btrfs_submit_metadata_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
#endif
@@ -114,15 +114,7 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int btrfs_read_extent_buffer(struct extent_buffer *buf,
struct btrfs_tree_parent_check *check);
-enum btrfs_wq_submit_cmd {
- WQ_SUBMIT_METADATA,
- WQ_SUBMIT_DATA,
- WQ_SUBMIT_DATA_DIO,
-};
-
-bool btrfs_wq_submit_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num,
- u64 dio_file_offset, enum btrfs_wq_submit_cmd cmd);
-blk_status_t btree_submit_bio_start(struct bio *bio);
+blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio);
int btrfs_alloc_log_tree_node(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c
index 9ae9cd1e7035..29a225836e28 100644
--- a/fs/btrfs/extent-io-tree.c
+++ b/fs/btrfs/extent-io-tree.c
@@ -972,8 +972,8 @@ static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
{
struct extent_state *state;
struct extent_state *prealloc = NULL;
- struct rb_node **p;
- struct rb_node *parent;
+ struct rb_node **p = NULL;
+ struct rb_node *parent = NULL;
int err = 0;
u64 last_start;
u64 last_end;
@@ -1218,8 +1218,8 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
{
struct extent_state *state;
struct extent_state *prealloc = NULL;
- struct rb_node **p;
- struct rb_node *parent;
+ struct rb_node **p = NULL;
+ struct rb_node *parent = NULL;
int err = 0;
u64 last_start;
u64 last_end;
@@ -1551,7 +1551,7 @@ u64 count_range_bits(struct extent_io_tree *tree,
u64 last = 0;
int found = 0;
- if (WARN_ON(search_end <= cur_start))
+ if (WARN_ON(search_end < cur_start))
return 0;
spin_lock(&tree->lock);
@@ -1625,7 +1625,7 @@ search:
}
/*
- * Searche a range in the state tree for a given mask. If 'filled' == 1, this
+ * Search a range in the state tree for a given mask. If 'filled' == 1, this
* returns 1 only if every extent in the tree has the bits set. Otherwise, 1
* is returned if any bit in the range is found set.
*/
diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h
index e3eeec380844..21766e49ec02 100644
--- a/fs/btrfs/extent-io-tree.h
+++ b/fs/btrfs/extent-io-tree.h
@@ -6,7 +6,6 @@
#include "misc.h"
struct extent_changeset;
-struct io_failure_record;
/* Bits for the extent state */
enum {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 892d78c1853c..824c657f59e8 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -16,7 +16,8 @@
#include <linux/percpu_counter.h>
#include <linux/lockdep.h>
#include <linux/crc32c.h>
-#include "misc.h"
+#include "ctree.h"
+#include "extent-tree.h"
#include "tree-log.h"
#include "disk-io.h"
#include "print-tree.h"
@@ -31,14 +32,12 @@
#include "space-info.h"
#include "block-rsv.h"
#include "delalloc-space.h"
-#include "block-group.h"
#include "discard.h"
#include "rcu-string.h"
#include "zoned.h"
#include "dev-replace.h"
#include "fs.h"
#include "accessors.h"
-#include "extent-tree.h"
#include "root-tree.h"
#include "file-item.h"
#include "orphan.h"
@@ -1713,6 +1712,11 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
BUG();
if (ret && insert_reserved)
btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
+ if (ret < 0)
+ btrfs_err(trans->fs_info,
+"failed to run delayed ref for logical %llu num_bytes %llu type %u action %u ref_mod %d: %d",
+ node->bytenr, node->num_bytes, node->type,
+ node->action, node->ref_mod, ret);
return ret;
}
@@ -1954,8 +1958,6 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
if (ret) {
unselect_delayed_ref_head(delayed_refs, locked_ref);
btrfs_put_delayed_ref(ref);
- btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
- ret);
return ret;
}
@@ -1963,7 +1965,7 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
cond_resched();
spin_lock(&locked_ref->lock);
- btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
+ btrfs_merge_delayed_refs(fs_info, delayed_refs, locked_ref);
}
return 0;
@@ -2010,7 +2012,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
* insert_inline_extent_backref()).
*/
spin_lock(&locked_ref->lock);
- btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
+ btrfs_merge_delayed_refs(fs_info, delayed_refs, locked_ref);
ret = btrfs_run_delayed_refs_for_head(trans, locked_ref,
&actual_count);
@@ -3382,7 +3384,9 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
enum btrfs_loop_type {
LOOP_CACHING_NOWAIT,
LOOP_CACHING_WAIT,
+ LOOP_UNSET_SIZE_CLASS,
LOOP_ALLOC_CHUNK,
+ LOOP_WRONG_SIZE_CLASS,
LOOP_NO_EMPTY_SIZE,
};
@@ -3450,81 +3454,6 @@ btrfs_release_block_group(struct btrfs_block_group *cache,
btrfs_put_block_group(cache);
}
-enum btrfs_extent_allocation_policy {
- BTRFS_EXTENT_ALLOC_CLUSTERED,
- BTRFS_EXTENT_ALLOC_ZONED,
-};
-
-/*
- * Structure used internally for find_free_extent() function. Wraps needed
- * parameters.
- */
-struct find_free_extent_ctl {
- /* Basic allocation info */
- u64 ram_bytes;
- u64 num_bytes;
- u64 min_alloc_size;
- u64 empty_size;
- u64 flags;
- int delalloc;
-
- /* Where to start the search inside the bg */
- u64 search_start;
-
- /* For clustered allocation */
- u64 empty_cluster;
- struct btrfs_free_cluster *last_ptr;
- bool use_cluster;
-
- bool have_caching_bg;
- bool orig_have_caching_bg;
-
- /* Allocation is called for tree-log */
- bool for_treelog;
-
- /* Allocation is called for data relocation */
- bool for_data_reloc;
-
- /* RAID index, converted from flags */
- int index;
-
- /*
- * Current loop number, check find_free_extent_update_loop() for details
- */
- int loop;
-
- /*
- * Whether we're refilling a cluster, if true we need to re-search
- * current block group but don't try to refill the cluster again.
- */
- bool retry_clustered;
-
- /*
- * Whether we're updating free space cache, if true we need to re-search
- * current block group but don't try updating free space cache again.
- */
- bool retry_unclustered;
-
- /* If current block group is cached */
- int cached;
-
- /* Max contiguous hole found */
- u64 max_extent_size;
-
- /* Total free space from free space cache, not always contiguous */
- u64 total_free_space;
-
- /* Found result */
- u64 found_offset;
-
- /* Hint where to start looking for an empty space */
- u64 hint_byte;
-
- /* Allocation policy */
- enum btrfs_extent_allocation_policy policy;
-};
-
-
/*
* Helper function for find_free_extent().
*
@@ -3556,8 +3485,7 @@ static int find_free_extent_clustered(struct btrfs_block_group *bg,
if (offset) {
/* We have a block, we're done */
spin_unlock(&last_ptr->refill_lock);
- trace_btrfs_reserve_extent_cluster(cluster_bg,
- ffe_ctl->search_start, ffe_ctl->num_bytes);
+ trace_btrfs_reserve_extent_cluster(cluster_bg, ffe_ctl);
*cluster_bg_ret = cluster_bg;
ffe_ctl->found_offset = offset;
return 0;
@@ -3607,10 +3535,8 @@ refill_cluster:
if (offset) {
/* We found one, proceed */
spin_unlock(&last_ptr->refill_lock);
- trace_btrfs_reserve_extent_cluster(bg,
- ffe_ctl->search_start,
- ffe_ctl->num_bytes);
ffe_ctl->found_offset = offset;
+ trace_btrfs_reserve_extent_cluster(bg, ffe_ctl);
return 0;
}
} else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
@@ -4025,24 +3951,6 @@ static int can_allocate_chunk(struct btrfs_fs_info *fs_info,
}
}
-static int chunk_allocation_failed(struct find_free_extent_ctl *ffe_ctl)
-{
- switch (ffe_ctl->policy) {
- case BTRFS_EXTENT_ALLOC_CLUSTERED:
- /*
- * If we can't allocate a new chunk we've already looped through
- * at least once, move on to the NO_EMPTY_SIZE case.
- */
- ffe_ctl->loop = LOOP_NO_EMPTY_SIZE;
- return 0;
- case BTRFS_EXTENT_ALLOC_ZONED:
- /* Give up here */
- return -ENOSPC;
- default:
- BUG();
- }
-}
-
/*
* Return >0 means caller needs to re-search for free extent
* Return 0 means we have the needed free extent.
@@ -4076,31 +3984,28 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
* LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
* caching kthreads as we move along
* LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
+ * LOOP_UNSET_SIZE_CLASS, allow unset size class
* LOOP_ALLOC_CHUNK, force a chunk allocation and try again
* LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
* again
*/
if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
ffe_ctl->index = 0;
- if (ffe_ctl->loop == LOOP_CACHING_NOWAIT) {
- /*
- * We want to skip the LOOP_CACHING_WAIT step if we
- * don't have any uncached bgs and we've already done a
- * full search through.
- */
- if (ffe_ctl->orig_have_caching_bg || !full_search)
- ffe_ctl->loop = LOOP_CACHING_WAIT;
- else
- ffe_ctl->loop = LOOP_ALLOC_CHUNK;
- } else {
+ /*
+ * We want to skip the LOOP_CACHING_WAIT step if we don't have
+ * any uncached bgs and we've already done a full search
+ * through.
+ */
+ if (ffe_ctl->loop == LOOP_CACHING_NOWAIT &&
+ (!ffe_ctl->orig_have_caching_bg && full_search))
ffe_ctl->loop++;
- }
+ ffe_ctl->loop++;
if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) {
struct btrfs_trans_handle *trans;
int exist = 0;
- /*Check if allocation policy allows to create a new chunk */
+ /* Check if allocation policy allows to create a new chunk */
ret = can_allocate_chunk(fs_info, ffe_ctl);
if (ret)
return ret;
@@ -4120,8 +4025,10 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
CHUNK_ALLOC_FORCE_FOR_EXTENT);
/* Do not bail out on ENOSPC since we can do more. */
- if (ret == -ENOSPC)
- ret = chunk_allocation_failed(ffe_ctl);
+ if (ret == -ENOSPC) {
+ ret = 0;
+ ffe_ctl->loop++;
+ }
else if (ret < 0)
btrfs_abort_transaction(trans, ret);
else
@@ -4151,6 +4058,21 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
return -ENOSPC;
}
+static bool find_free_extent_check_size_class(struct find_free_extent_ctl *ffe_ctl,
+ struct btrfs_block_group *bg)
+{
+ if (ffe_ctl->policy == BTRFS_EXTENT_ALLOC_ZONED)
+ return true;
+ if (!btrfs_block_group_should_use_size_class(bg))
+ return true;
+ if (ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS)
+ return true;
+ if (ffe_ctl->loop >= LOOP_UNSET_SIZE_CLASS &&
+ bg->size_class == BTRFS_BG_SZ_NONE)
+ return true;
+ return ffe_ctl->size_class == bg->size_class;
+}
+
static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
struct find_free_extent_ctl *ffe_ctl,
struct btrfs_space_info *space_info,
@@ -4285,6 +4207,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
ffe_ctl->total_free_space = 0;
ffe_ctl->found_offset = 0;
ffe_ctl->policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
+ ffe_ctl->size_class = btrfs_calc_block_group_size_class(ffe_ctl->num_bytes);
if (btrfs_is_zoned(fs_info))
ffe_ctl->policy = BTRFS_EXTENT_ALLOC_ZONED;
@@ -4293,8 +4216,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
ins->objectid = 0;
ins->offset = 0;
- trace_find_free_extent(root, ffe_ctl->num_bytes, ffe_ctl->empty_size,
- ffe_ctl->flags);
+ trace_find_free_extent(root, ffe_ctl);
space_info = btrfs_find_space_info(fs_info, ffe_ctl->flags);
if (!space_info) {
@@ -4337,6 +4259,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
block_group->flags);
btrfs_lock_block_group(block_group,
ffe_ctl->delalloc);
+ ffe_ctl->hinted = true;
goto have_block_group;
}
} else if (block_group) {
@@ -4344,6 +4267,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
}
}
search:
+ trace_find_free_extent_search_loop(root, ffe_ctl);
ffe_ctl->have_caching_bg = false;
if (ffe_ctl->index == btrfs_bg_flags_to_raid_index(ffe_ctl->flags) ||
ffe_ctl->index == 0)
@@ -4353,6 +4277,7 @@ search:
&space_info->block_groups[ffe_ctl->index], list) {
struct btrfs_block_group *bg_ret;
+ ffe_ctl->hinted = false;
/* If the block group is read-only, we can skip it entirely. */
if (unlikely(block_group->ro)) {
if (ffe_ctl->for_treelog)
@@ -4394,6 +4319,7 @@ search:
}
have_block_group:
+ trace_find_free_extent_have_block_group(root, ffe_ctl, block_group);
ffe_ctl->cached = btrfs_block_group_done(block_group);
if (unlikely(!ffe_ctl->cached)) {
ffe_ctl->have_caching_bg = true;
@@ -4418,6 +4344,9 @@ have_block_group:
if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
goto loop;
+ if (!find_free_extent_check_size_class(ffe_ctl, block_group))
+ goto loop;
+
bg_ret = NULL;
ret = do_allocation(block_group, ffe_ctl, &bg_ret);
if (ret == 0) {
@@ -4452,7 +4381,8 @@ have_block_group:
ret = btrfs_add_reserved_bytes(block_group, ffe_ctl->ram_bytes,
ffe_ctl->num_bytes,
- ffe_ctl->delalloc);
+ ffe_ctl->delalloc,
+ ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS);
if (ret == -EAGAIN) {
btrfs_add_free_space_unused(block_group,
ffe_ctl->found_offset,
@@ -4465,8 +4395,7 @@ have_block_group:
ins->objectid = ffe_ctl->search_start;
ins->offset = ffe_ctl->num_bytes;
- trace_btrfs_reserve_extent(block_group, ffe_ctl->search_start,
- ffe_ctl->num_bytes);
+ trace_btrfs_reserve_extent(block_group, ffe_ctl);
btrfs_release_block_group(block_group, ffe_ctl->delalloc);
break;
loop:
@@ -4909,7 +4838,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
btrfs_set_buffer_lockdep_class(lockdep_owner, buf, level);
__btrfs_tree_lock(buf, nest);
- btrfs_clean_tree_block(buf);
+ btrfs_clear_buffer_dirty(trans, buf);
clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
clear_bit(EXTENT_BUFFER_NO_CHECK, &buf->bflags);
@@ -5539,13 +5468,12 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
}
}
}
- /* make block locked assertion in btrfs_clean_tree_block happy */
- if (!path->locks[level] &&
- btrfs_header_generation(eb) == trans->transid) {
+ /* Make block locked assertion in btrfs_clear_buffer_dirty happy. */
+ if (!path->locks[level]) {
btrfs_tree_lock(eb);
path->locks[level] = BTRFS_WRITE_LOCK;
}
- btrfs_clean_tree_block(eb);
+ btrfs_clear_buffer_dirty(trans, eb);
}
if (eb == root->node) {
diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
index ae5425253603..0c958fc1b3b8 100644
--- a/fs/btrfs/extent-tree.h
+++ b/fs/btrfs/extent-tree.h
@@ -3,6 +3,87 @@
#ifndef BTRFS_EXTENT_TREE_H
#define BTRFS_EXTENT_TREE_H
+#include "misc.h"
+#include "block-group.h"
+
+struct btrfs_free_cluster;
+
+enum btrfs_extent_allocation_policy {
+ BTRFS_EXTENT_ALLOC_CLUSTERED,
+ BTRFS_EXTENT_ALLOC_ZONED,
+};
+
+struct find_free_extent_ctl {
+ /* Basic allocation info */
+ u64 ram_bytes;
+ u64 num_bytes;
+ u64 min_alloc_size;
+ u64 empty_size;
+ u64 flags;
+ int delalloc;
+
+ /* Where to start the search inside the bg */
+ u64 search_start;
+
+ /* For clustered allocation */
+ u64 empty_cluster;
+ struct btrfs_free_cluster *last_ptr;
+ bool use_cluster;
+
+ bool have_caching_bg;
+ bool orig_have_caching_bg;
+
+ /* Allocation is called for tree-log */
+ bool for_treelog;
+
+ /* Allocation is called for data relocation */
+ bool for_data_reloc;
+
+ /* RAID index, converted from flags */
+ int index;
+
+ /*
+ * Current loop number, check find_free_extent_update_loop() for details
+ */
+ int loop;
+
+ /*
+ * Whether we're refilling a cluster, if true we need to re-search
+ * current block group but don't try to refill the cluster again.
+ */
+ bool retry_clustered;
+
+ /*
+ * Whether we're updating free space cache, if true we need to re-search
+ * current block group but don't try updating free space cache again.
+ */
+ bool retry_unclustered;
+
+ /* If current block group is cached */
+ int cached;
+
+ /* Max contiguous hole found */
+ u64 max_extent_size;
+
+ /* Total free space from free space cache, not always contiguous */
+ u64 total_free_space;
+
+ /* Found result */
+ u64 found_offset;
+
+ /* Hint where to start looking for an empty space */
+ u64 hint_byte;
+
+ /* Allocation policy */
+ enum btrfs_extent_allocation_policy policy;
+
+ /* Whether or not the allocator is currently following a hint */
+ bool hinted;
+
+ /* Size class of block groups to prefer in early loops */
+ enum btrfs_block_group_size_class size_class;
+};
+
enum btrfs_inline_ref_type {
BTRFS_REF_TYPE_INVALID,
BTRFS_REF_TYPE_BLOCK,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 83dd3aa59663..c25fa74d7615 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -36,6 +36,7 @@
#include "file.h"
#include "dev-replace.h"
#include "super.h"
+#include "transaction.h"
static struct kmem_cache *extent_buffer_cache;
@@ -99,11 +100,19 @@ struct btrfs_bio_ctrl {
struct bio *bio;
int mirror_num;
enum btrfs_compression_type compress_type;
- u32 len_to_stripe_boundary;
u32 len_to_oe_boundary;
btrfs_bio_end_io_t end_io_func;
/*
+ * This is for metadata read, to provide the extra needed verification
+ * info. This has to be provided for submit_one_bio(), as
+ * submit_one_bio() can submit a bio if it ends at stripe boundary. If
+ * no such parent_check is provided, the metadata can hit false alert at
+ * endio time.
+ */
+ struct btrfs_tree_parent_check *parent_check;
+
+ /*
* Tell writepage not to lock the state bits for this range, it still
* does the unlocking.
*/
@@ -117,7 +126,7 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
{
struct bio *bio;
struct bio_vec *bv;
- struct btrfs_inode *inode;
+ struct inode *inode;
int mirror_num;
if (!bio_ctrl->bio)
@@ -125,21 +134,31 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
bio = bio_ctrl->bio;
bv = bio_first_bvec_all(bio);
- inode = BTRFS_I(bv->bv_page->mapping->host);
+ inode = bv->bv_page->mapping->host;
mirror_num = bio_ctrl->mirror_num;
/* Caller should ensure the bio has at least some range added */
ASSERT(bio->bi_iter.bi_size);
- btrfs_bio(bio)->file_offset = page_offset(bv->bv_page) + bv->bv_offset;
+ if (!is_data_inode(inode)) {
+ if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
+ /*
+ * For metadata read, we should have the parent_check,
+ * and copy it to bbio for metadata verification.
+ */
+ ASSERT(bio_ctrl->parent_check);
+ memcpy(&btrfs_bio(bio)->parent_check,
+ bio_ctrl->parent_check,
+ sizeof(struct btrfs_tree_parent_check));
+ }
+ bio->bi_opf |= REQ_META;
+ }
- if (!is_data_inode(&inode->vfs_inode))
- btrfs_submit_metadata_bio(inode, bio, mirror_num);
- else if (btrfs_op(bio) == BTRFS_MAP_WRITE)
- btrfs_submit_data_write_bio(inode, bio, mirror_num);
+ if (btrfs_op(bio) == BTRFS_MAP_READ &&
+ bio_ctrl->compress_type != BTRFS_COMPRESS_NONE)
+ btrfs_submit_compressed_read(inode, bio, mirror_num);
else
- btrfs_submit_data_read_bio(inode, bio, mirror_num,
- bio_ctrl->compress_type);
+ btrfs_submit_bio(bio, mirror_num);
/* The bio is owned by the end_io handler now */
bio_ctrl->bio = NULL;
@@ -495,266 +514,6 @@ void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
start, end, page_ops, NULL);
}
-static int insert_failrec(struct btrfs_inode *inode,
- struct io_failure_record *failrec)
-{
- struct rb_node *exist;
-
- spin_lock(&inode->io_failure_lock);
- exist = rb_simple_insert(&inode->io_failure_tree, failrec->bytenr,
- &failrec->rb_node);
- spin_unlock(&inode->io_failure_lock);
-
- return (exist == NULL) ? 0 : -EEXIST;
-}
-
-static struct io_failure_record *get_failrec(struct btrfs_inode *inode, u64 start)
-{
- struct rb_node *node;
- struct io_failure_record *failrec = ERR_PTR(-ENOENT);
-
- spin_lock(&inode->io_failure_lock);
- node = rb_simple_search(&inode->io_failure_tree, start);
- if (node)
- failrec = rb_entry(node, struct io_failure_record, rb_node);
- spin_unlock(&inode->io_failure_lock);
- return failrec;
-}
-
-static void free_io_failure(struct btrfs_inode *inode,
- struct io_failure_record *rec)
-{
- spin_lock(&inode->io_failure_lock);
- rb_erase(&rec->rb_node, &inode->io_failure_tree);
- spin_unlock(&inode->io_failure_lock);
-
- kfree(rec);
-}
-
-static int next_mirror(const struct io_failure_record *failrec, int cur_mirror)
-{
- if (cur_mirror == failrec->num_copies)
- return cur_mirror + 1 - failrec->num_copies;
- return cur_mirror + 1;
-}
-
-static int prev_mirror(const struct io_failure_record *failrec, int cur_mirror)
-{
- if (cur_mirror == 1)
- return failrec->num_copies;
- return cur_mirror - 1;
-}
-
-/*
- * each time an IO finishes, we do a fast check in the IO failure tree
- * to see if we need to process or clean up an io_failure_record
- */
-int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start,
- struct page *page, unsigned int pg_offset)
-{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct extent_io_tree *io_tree = &inode->io_tree;
- u64 ino = btrfs_ino(inode);
- u64 locked_start, locked_end;
- struct io_failure_record *failrec;
- int mirror;
- int ret;
-
- failrec = get_failrec(inode, start);
- if (IS_ERR(failrec))
- return 0;
-
- BUG_ON(!failrec->this_mirror);
-
- if (sb_rdonly(fs_info->sb))
- goto out;
-
- ret = find_first_extent_bit(io_tree, failrec->bytenr, &locked_start,
- &locked_end, EXTENT_LOCKED, NULL);
- if (ret || locked_start > failrec->bytenr ||
- locked_end < failrec->bytenr + failrec->len - 1)
- goto out;
-
- mirror = failrec->this_mirror;
- do {
- mirror = prev_mirror(failrec, mirror);
- btrfs_repair_io_failure(fs_info, ino, start, failrec->len,
- failrec->logical, page, pg_offset, mirror);
- } while (mirror != failrec->failed_mirror);
-
-out:
- free_io_failure(inode, failrec);
- return 0;
-}
-
-/*
- * Can be called when
- * - hold extent lock
- * - under ordered extent
- * - the inode is freeing
- */
-void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
-{
- struct io_failure_record *failrec;
- struct rb_node *node, *next;
-
- if (RB_EMPTY_ROOT(&inode->io_failure_tree))
- return;
-
- spin_lock(&inode->io_failure_lock);
- node = rb_simple_search_first(&inode->io_failure_tree, start);
- while (node) {
- failrec = rb_entry(node, struct io_failure_record, rb_node);
- if (failrec->bytenr > end)
- break;
-
- next = rb_next(node);
- rb_erase(&failrec->rb_node, &inode->io_failure_tree);
- kfree(failrec);
-
- node = next;
- }
- spin_unlock(&inode->io_failure_lock);
-}
-
-static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode,
- struct btrfs_bio *bbio,
- unsigned int bio_offset)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- u64 start = bbio->file_offset + bio_offset;
- struct io_failure_record *failrec;
- const u32 sectorsize = fs_info->sectorsize;
- int ret;
-
- failrec = get_failrec(BTRFS_I(inode), start);
- if (!IS_ERR(failrec)) {
- btrfs_debug(fs_info,
- "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu",
- failrec->logical, failrec->bytenr, failrec->len);
- /*
- * when data can be on disk more than twice, add to failrec here
- * (e.g. with a list for failed_mirror) to make
- * clean_io_failure() clean all those errors at once.
- */
- ASSERT(failrec->this_mirror == bbio->mirror_num);
- ASSERT(failrec->len == fs_info->sectorsize);
- return failrec;
- }
-
- failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
- if (!failrec)
- return ERR_PTR(-ENOMEM);
-
- RB_CLEAR_NODE(&failrec->rb_node);
- failrec->bytenr = start;
- failrec->len = sectorsize;
- failrec->failed_mirror = bbio->mirror_num;
- failrec->this_mirror = bbio->mirror_num;
- failrec->logical = (bbio->iter.bi_sector << SECTOR_SHIFT) + bio_offset;
-
- btrfs_debug(fs_info,
- "new io failure record logical %llu start %llu",
- failrec->logical, start);
-
- failrec->num_copies = btrfs_num_copies(fs_info, failrec->logical, sectorsize);
- if (failrec->num_copies == 1) {
- /*
- * We only have a single copy of the data, so don't bother with
- * all the retry and error correction code that follows. No
- * matter what the error is, it is very likely to persist.
- */
- btrfs_debug(fs_info,
- "cannot repair logical %llu num_copies %d",
- failrec->logical, failrec->num_copies);
- kfree(failrec);
- return ERR_PTR(-EIO);
- }
-
- /* Set the bits in the private failure tree */
- ret = insert_failrec(BTRFS_I(inode), failrec);
- if (ret) {
- kfree(failrec);
- return ERR_PTR(ret);
- }
-
- return failrec;
-}
-
-int btrfs_repair_one_sector(struct btrfs_inode *inode, struct btrfs_bio *failed_bbio,
- u32 bio_offset, struct page *page, unsigned int pgoff,
- bool submit_buffered)
-{
- u64 start = failed_bbio->file_offset + bio_offset;
- struct io_failure_record *failrec;
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct bio *failed_bio = &failed_bbio->bio;
- const int icsum = bio_offset >> fs_info->sectorsize_bits;
- struct bio *repair_bio;
- struct btrfs_bio *repair_bbio;
-
- btrfs_debug(fs_info,
- "repair read error: read error at %llu", start);
-
- BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
-
- failrec = btrfs_get_io_failure_record(&inode->vfs_inode, failed_bbio, bio_offset);
- if (IS_ERR(failrec))
- return PTR_ERR(failrec);
-
- /*
- * There are two premises:
- * a) deliver good data to the caller
- * b) correct the bad sectors on disk
- *
- * Since we're only doing repair for one sector, we only need to get
- * a good copy of the failed sector and if we succeed, we have setup
- * everything for btrfs_repair_io_failure to do the rest for us.
- */
- failrec->this_mirror = next_mirror(failrec, failrec->this_mirror);
- if (failrec->this_mirror == failrec->failed_mirror) {
- btrfs_debug(fs_info,
- "failed to repair num_copies %d this_mirror %d failed_mirror %d",
- failrec->num_copies, failrec->this_mirror, failrec->failed_mirror);
- free_io_failure(inode, failrec);
- return -EIO;
- }
-
- repair_bio = btrfs_bio_alloc(1, REQ_OP_READ, failed_bbio->end_io,
- failed_bbio->private);
- repair_bbio = btrfs_bio(repair_bio);
- repair_bbio->file_offset = start;
- repair_bio->bi_iter.bi_sector = failrec->logical >> 9;
-
- if (failed_bbio->csum) {
- const u32 csum_size = fs_info->csum_size;
-
- repair_bbio->csum = repair_bbio->csum_inline;
- memcpy(repair_bbio->csum,
- failed_bbio->csum + csum_size * icsum, csum_size);
- }
-
- bio_add_page(repair_bio, page, failrec->len, pgoff);
- repair_bbio->iter = repair_bio->bi_iter;
-
- btrfs_debug(fs_info,
- "repair read error: submitting new read to mirror %d",
- failrec->this_mirror);
-
- /*
- * At this point we have a bio, so any errors from bio submission will
- * be handled by the endio on the repair_bio, so we can't return an
- * error here.
- */
- if (submit_buffered)
- btrfs_submit_data_read_bio(inode, repair_bio,
- failrec->this_mirror, 0);
- else
- btrfs_submit_dio_repair_bio(inode, repair_bio, failrec->this_mirror);
-
- return BLK_STS_OK;
-}
-
static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
{
struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
@@ -783,79 +542,6 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
btrfs_subpage_end_reader(fs_info, page, start, len);
}
-static void end_sector_io(struct page *page, u64 offset, bool uptodate)
-{
- struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
- const u32 sectorsize = inode->root->fs_info->sectorsize;
-
- end_page_read(page, uptodate, offset, sectorsize);
- unlock_extent(&inode->io_tree, offset, offset + sectorsize - 1, NULL);
-}
-
-static void submit_data_read_repair(struct inode *inode,
- struct btrfs_bio *failed_bbio,
- u32 bio_offset, const struct bio_vec *bvec,
- unsigned int error_bitmap)
-{
- const unsigned int pgoff = bvec->bv_offset;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct page *page = bvec->bv_page;
- const u64 start = page_offset(bvec->bv_page) + bvec->bv_offset;
- const u64 end = start + bvec->bv_len - 1;
- const u32 sectorsize = fs_info->sectorsize;
- const int nr_bits = (end + 1 - start) >> fs_info->sectorsize_bits;
- int i;
-
- BUG_ON(bio_op(&failed_bbio->bio) == REQ_OP_WRITE);
-
- /* This repair is only for data */
- ASSERT(is_data_inode(inode));
-
- /* We're here because we had some read errors or csum mismatch */
- ASSERT(error_bitmap);
-
- /*
- * We only get called on buffered IO, thus page must be mapped and bio
- * must not be cloned.
- */
- ASSERT(page->mapping && !bio_flagged(&failed_bbio->bio, BIO_CLONED));
-
- /* Iterate through all the sectors in the range */
- for (i = 0; i < nr_bits; i++) {
- const unsigned int offset = i * sectorsize;
- bool uptodate = false;
- int ret;
-
- if (!(error_bitmap & (1U << i))) {
- /*
- * This sector has no error, just end the page read
- * and unlock the range.
- */
- uptodate = true;
- goto next;
- }
-
- ret = btrfs_repair_one_sector(BTRFS_I(inode), failed_bbio,
- bio_offset + offset, page, pgoff + offset,
- true);
- if (!ret) {
- /*
- * We have submitted the read repair, the page release
- * will be handled by the endio function of the
- * submitted repair bio.
- * Thus we don't need to do any thing here.
- */
- continue;
- }
- /*
- * Continue on failed repair, otherwise the remaining sectors
- * will not be properly unlocked.
- */
-next:
- end_sector_io(page, start + offset, uptodate);
- }
-}
-
/* lots and lots of room for performance fixes in the end_bio funcs */
void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
@@ -899,7 +585,6 @@ static void end_bio_extent_writepage(struct btrfs_bio *bbio)
u64 start;
u64 end;
struct bvec_iter_all iter_all;
- bool first_bvec = true;
ASSERT(!bio_flagged(bio, BIO_CLONED));
bio_for_each_segment_all(bvec, bio, iter_all) {
@@ -921,11 +606,6 @@ static void end_bio_extent_writepage(struct btrfs_bio *bbio)
start = page_offset(page) + bvec->bv_offset;
end = start + bvec->bv_len - 1;
- if (first_bvec) {
- btrfs_record_physical_zoned(inode, start, bio);
- first_bvec = false;
- }
-
end_extent_writepage(page, error, start, end);
btrfs_page_clear_writeback(fs_info, page, start, bvec->bv_len);
@@ -1073,8 +753,6 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
struct inode *inode = page->mapping->host;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
const u32 sectorsize = fs_info->sectorsize;
- unsigned int error_bitmap = (unsigned int)-1;
- bool repair = false;
u64 start;
u64 end;
u32 len;
@@ -1106,25 +784,14 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
len = bvec->bv_len;
mirror = bbio->mirror_num;
- if (likely(uptodate)) {
- if (is_data_inode(inode)) {
- error_bitmap = btrfs_verify_data_csum(bbio,
- bio_offset, page, start, end);
- if (error_bitmap)
- uptodate = false;
- } else {
- if (btrfs_validate_metadata_buffer(bbio,
- page, start, end, mirror))
- uptodate = false;
- }
- }
+ if (uptodate && !is_data_inode(inode) &&
+ btrfs_validate_metadata_buffer(bbio, page, start, end, mirror))
+ uptodate = false;
if (likely(uptodate)) {
loff_t i_size = i_size_read(inode);
pgoff_t end_index = i_size >> PAGE_SHIFT;
- btrfs_clean_io_failure(BTRFS_I(inode), start, page, 0);
-
/*
* Zero out the remaining part if this range straddles
* i_size.
@@ -1141,19 +808,7 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
zero_user_segment(page, zero_start,
offset_in_page(end) + 1);
}
- } else if (is_data_inode(inode)) {
- /*
- * Only try to repair bios that actually made it to a
- * device. If the bio failed to be submitted mirror
- * is 0 and we need to fail it without retrying.
- *
- * This also includes the high level bios for compressed
- * extents - these never make it to a device and repair
- * is already handled on the lower compressed bio.
- */
- if (mirror > 0)
- repair = true;
- } else {
+ } else if (!is_data_inode(inode)) {
struct extent_buffer *eb;
eb = find_extent_buffer_readpage(fs_info, page, start);
@@ -1162,19 +817,10 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
atomic_dec(&eb->io_pages);
}
- if (repair) {
- /*
- * submit_data_read_repair() will handle all the good
- * and bad sectors, we just continue to the next bvec.
- */
- submit_data_read_repair(inode, bbio, bio_offset, bvec,
- error_bitmap);
- } else {
- /* Update page status and unlock */
- end_page_read(page, uptodate, start, len);
- endio_readpage_release_extent(&processed, BTRFS_I(inode),
- start, end, PageUptodate(page));
- }
+ /* Update page status and unlock. */
+ end_page_read(page, uptodate, start, len);
+ endio_readpage_release_extent(&processed, BTRFS_I(inode),
+ start, end, PageUptodate(page));
ASSERT(bio_offset + len > bio_offset);
bio_offset += len;
@@ -1182,7 +828,6 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
}
/* Release the last extent */
endio_readpage_release_extent(&processed, NULL, 0, 0, false);
- btrfs_bio_free_csum(bbio);
bio_put(bio);
}
@@ -1250,11 +895,10 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
u32 real_size;
const sector_t sector = disk_bytenr >> SECTOR_SHIFT;
bool contig = false;
- int ret;
ASSERT(bio);
/* The limit should be calculated when bio_ctrl->bio is allocated */
- ASSERT(bio_ctrl->len_to_oe_boundary && bio_ctrl->len_to_stripe_boundary);
+ ASSERT(bio_ctrl->len_to_oe_boundary);
if (bio_ctrl->compress_type != compress_type)
return 0;
@@ -1290,9 +934,7 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
if (!contig)
return 0;
- real_size = min(bio_ctrl->len_to_oe_boundary,
- bio_ctrl->len_to_stripe_boundary) - bio_size;
- real_size = min(real_size, size);
+ real_size = min(bio_ctrl->len_to_oe_boundary - bio_size, size);
/*
* If real_size is 0, never call bio_add_*_page(), as even size is 0,
@@ -1301,82 +943,45 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
if (real_size == 0)
return 0;
- if (bio_op(bio) == REQ_OP_ZONE_APPEND)
- ret = bio_add_zone_append_page(bio, page, real_size, pg_offset);
- else
- ret = bio_add_page(bio, page, real_size, pg_offset);
-
- return ret;
+ return bio_add_page(bio, page, real_size, pg_offset);
}
-static int calc_bio_boundaries(struct btrfs_bio_ctrl *bio_ctrl,
- struct btrfs_inode *inode, u64 file_offset)
+static void calc_bio_boundaries(struct btrfs_bio_ctrl *bio_ctrl,
+ struct btrfs_inode *inode, u64 file_offset)
{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct btrfs_io_geometry geom;
struct btrfs_ordered_extent *ordered;
- struct extent_map *em;
- u64 logical = (bio_ctrl->bio->bi_iter.bi_sector << SECTOR_SHIFT);
- int ret;
/*
- * Pages for compressed extent are never submitted to disk directly,
- * thus it has no real boundary, just set them to U32_MAX.
- *
- * The split happens for real compressed bio, which happens in
- * btrfs_submit_compressed_read/write().
+ * Limit the extent to the ordered boundary for Zone Append.
+ * Compressed bios aren't submitted directly, so it doesn't apply to
+ * them.
*/
- if (bio_ctrl->compress_type != BTRFS_COMPRESS_NONE) {
- bio_ctrl->len_to_oe_boundary = U32_MAX;
- bio_ctrl->len_to_stripe_boundary = U32_MAX;
- return 0;
- }
- em = btrfs_get_chunk_map(fs_info, logical, fs_info->sectorsize);
- if (IS_ERR(em))
- return PTR_ERR(em);
- ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio_ctrl->bio),
- logical, &geom);
- free_extent_map(em);
- if (ret < 0) {
- return ret;
- }
- if (geom.len > U32_MAX)
- bio_ctrl->len_to_stripe_boundary = U32_MAX;
- else
- bio_ctrl->len_to_stripe_boundary = (u32)geom.len;
-
- if (bio_op(bio_ctrl->bio) != REQ_OP_ZONE_APPEND) {
- bio_ctrl->len_to_oe_boundary = U32_MAX;
- return 0;
- }
-
- /* Ordered extent not yet created, so we're good */
- ordered = btrfs_lookup_ordered_extent(inode, file_offset);
- if (!ordered) {
- bio_ctrl->len_to_oe_boundary = U32_MAX;
- return 0;
+ if (bio_ctrl->compress_type == BTRFS_COMPRESS_NONE &&
+ btrfs_use_zone_append(btrfs_bio(bio_ctrl->bio))) {
+ ordered = btrfs_lookup_ordered_extent(inode, file_offset);
+ if (ordered) {
+ bio_ctrl->len_to_oe_boundary = min_t(u32, U32_MAX,
+ ordered->file_offset +
+ ordered->disk_num_bytes - file_offset);
+ btrfs_put_ordered_extent(ordered);
+ return;
+ }
}
- bio_ctrl->len_to_oe_boundary = min_t(u32, U32_MAX,
- ordered->disk_bytenr + ordered->disk_num_bytes - logical);
- btrfs_put_ordered_extent(ordered);
- return 0;
+ bio_ctrl->len_to_oe_boundary = U32_MAX;
}
-static int alloc_new_bio(struct btrfs_inode *inode,
- struct btrfs_bio_ctrl *bio_ctrl,
- struct writeback_control *wbc,
- blk_opf_t opf,
- u64 disk_bytenr, u32 offset, u64 file_offset,
- enum btrfs_compression_type compress_type)
+static void alloc_new_bio(struct btrfs_inode *inode,
+ struct btrfs_bio_ctrl *bio_ctrl,
+ struct writeback_control *wbc, blk_opf_t opf,
+ u64 disk_bytenr, u32 offset, u64 file_offset,
+ enum btrfs_compression_type compress_type)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct bio *bio;
- int ret;
- ASSERT(bio_ctrl->end_io_func);
-
- bio = btrfs_bio_alloc(BIO_MAX_VECS, opf, bio_ctrl->end_io_func, NULL);
+ bio = btrfs_bio_alloc(BIO_MAX_VECS, opf, inode, bio_ctrl->end_io_func,
+ NULL);
/*
* For compressed page range, its disk_bytenr is always @disk_bytenr
* passed in, no matter if we have added any range into previous bio.
@@ -1385,48 +990,21 @@ static int alloc_new_bio(struct btrfs_inode *inode,
bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
else
bio->bi_iter.bi_sector = (disk_bytenr + offset) >> SECTOR_SHIFT;
+ btrfs_bio(bio)->file_offset = file_offset;
bio_ctrl->bio = bio;
bio_ctrl->compress_type = compress_type;
- ret = calc_bio_boundaries(bio_ctrl, inode, file_offset);
- if (ret < 0)
- goto error;
+ calc_bio_boundaries(bio_ctrl, inode, file_offset);
if (wbc) {
/*
- * For Zone append we need the correct block_device that we are
- * going to write to set in the bio to be able to respect the
- * hardware limitation. Look it up here:
+ * Pick the last added device to support cgroup writeback. For
+ * multi-device file systems this means blk-cgroup policies have
+ * to always be set on the last added/replaced device.
+ * This is a bit odd but has been like that for a long time.
*/
- if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
- struct btrfs_device *dev;
-
- dev = btrfs_zoned_get_device(fs_info, disk_bytenr,
- fs_info->sectorsize);
- if (IS_ERR(dev)) {
- ret = PTR_ERR(dev);
- goto error;
- }
-
- bio_set_dev(bio, dev->bdev);
- } else {
- /*
- * Otherwise pick the last added device to support
- * cgroup writeback. For multi-device file systems this
- * means blk-cgroup policies have to always be set on the
- * last added/replaced device. This is a bit odd but has
- * been like that for a long time.
- */
- bio_set_dev(bio, fs_info->fs_devices->latest_dev->bdev);
- }
+ bio_set_dev(bio, fs_info->fs_devices->latest_dev->bdev);
wbc_init_bio(wbc, bio);
- } else {
- ASSERT(bio_op(bio) != REQ_OP_ZONE_APPEND);
}
- return 0;
-error:
- bio_ctrl->bio = NULL;
- btrfs_bio_end_io(btrfs_bio(bio), errno_to_blk_status(ret));
- return ret;
}
/*
@@ -1452,7 +1030,6 @@ static int submit_extent_page(blk_opf_t opf,
enum btrfs_compression_type compress_type,
bool force_bio_submit)
{
- int ret = 0;
struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
unsigned int cur = pg_offset;
@@ -1472,12 +1049,9 @@ static int submit_extent_page(blk_opf_t opf,
/* Allocate new bio if needed */
if (!bio_ctrl->bio) {
- ret = alloc_new_bio(inode, bio_ctrl, wbc, opf,
- disk_bytenr, offset,
- page_offset(page) + cur,
- compress_type);
- if (ret < 0)
- return ret;
+ alloc_new_bio(inode, bio_ctrl, wbc, opf, disk_bytenr,
+ offset, page_offset(page) + cur,
+ compress_type);
}
/*
* We must go through btrfs_bio_add_page() to ensure each
@@ -2034,10 +1608,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
* find_next_dirty_byte() are all exclusive
*/
iosize = min(min(em_end, end + 1), dirty_range_end) - cur;
-
- if (btrfs_use_zone_append(inode, em->block_start))
- op = REQ_OP_ZONE_APPEND;
-
free_extent_map(em);
em = NULL;
@@ -2341,13 +1911,6 @@ static void set_btree_ioerr(struct page *page, struct extent_buffer *eb)
mapping_set_error(page->mapping, -EIO);
/*
- * If we error out, we should add back the dirty_metadata_bytes
- * to make it consistent.
- */
- percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
- eb->len, fs_info->dirty_metadata_batch);
-
- /*
* If writeback for a btree extent that doesn't belong to a log tree
* failed, increment the counter transaction->eb_write_errors.
* We do this because while the transaction is running and before it's
@@ -3806,6 +3369,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
lockend = round_up(start + len, inode->root->fs_info->sectorsize);
prev_extent_end = lockstart;
+ btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
@@ -3999,6 +3563,7 @@ check_eof_delalloc:
out_unlock:
unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
out:
free_extent_state(delalloc_cached_state);
btrfs_free_backref_share_ctx(backref_ctx);
@@ -4702,12 +4267,25 @@ static void clear_subpage_extent_buffer_dirty(const struct extent_buffer *eb)
WARN_ON(atomic_read(&eb->refs) == 0);
}
-void clear_extent_buffer_dirty(const struct extent_buffer *eb)
+void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
+ struct extent_buffer *eb)
{
+ struct btrfs_fs_info *fs_info = eb->fs_info;
int i;
int num_pages;
struct page *page;
+ btrfs_assert_tree_write_locked(eb);
+
+ if (trans && btrfs_header_generation(eb) != trans->transid)
+ return;
+
+ if (!test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags))
+ return;
+
+ percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, -eb->len,
+ fs_info->dirty_metadata_batch);
+
if (eb->fs_info->nodesize < PAGE_SIZE)
return clear_subpage_extent_buffer_dirty(eb);
@@ -4829,6 +4407,7 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
struct extent_state *cached_state = NULL;
struct btrfs_bio_ctrl bio_ctrl = {
.mirror_num = mirror_num,
+ .parent_check = check,
};
int ret = 0;
@@ -4878,7 +4457,6 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
*/
atomic_dec(&eb->io_pages);
}
- memcpy(&btrfs_bio(bio_ctrl.bio)->parent_check, check, sizeof(*check));
submit_one_bio(&bio_ctrl);
if (ret || wait != WAIT_COMPLETE) {
free_extent_state(cached_state);
@@ -4905,6 +4483,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
unsigned long num_reads = 0;
struct btrfs_bio_ctrl bio_ctrl = {
.mirror_num = mirror_num,
+ .parent_check = check,
};
if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
@@ -4996,7 +4575,6 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
}
}
- memcpy(&btrfs_bio(bio_ctrl.bio)->parent_check, check, sizeof(*check));
submit_one_bio(&bio_ctrl);
if (ret || wait != WAIT_COMPLETE)
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index a2c82448b2e0..4341ad978fb8 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -11,6 +11,8 @@
#include "ulist.h"
#include "misc.h"
+struct btrfs_trans_handle;
+
enum {
EXTENT_BUFFER_UPTODATE,
EXTENT_BUFFER_DIRTY,
@@ -60,11 +62,9 @@ enum {
#define BITMAP_LAST_BYTE_MASK(nbits) \
(BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
-struct btrfs_bio;
struct btrfs_root;
struct btrfs_inode;
struct btrfs_fs_info;
-struct io_failure_record;
struct extent_io_tree;
struct btrfs_tree_parent_check;
@@ -262,7 +262,6 @@ void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long star
void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
unsigned long start, unsigned long pos,
unsigned long len);
-void clear_extent_buffer_dirty(const struct extent_buffer *eb);
bool set_extent_buffer_dirty(struct extent_buffer *eb);
void set_extent_buffer_uptodate(struct extent_buffer *eb);
void clear_extent_buffer_uptodate(struct extent_buffer *eb);
@@ -274,40 +273,13 @@ void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
u32 bits_to_clear, unsigned long page_ops);
int extent_invalidate_folio(struct extent_io_tree *tree,
struct folio *folio, size_t offset);
+void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
+ struct extent_buffer *buf);
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array);
void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
-/*
- * When IO fails, either with EIO or csum verification fails, we
- * try other mirrors that might have a good copy of the data. This
- * io_failure_record is used to record state as we go through all the
- * mirrors. If another mirror has good data, the sector is set up to date
- * and things continue. If a good mirror can't be found, the original
- * bio end_io callback is called to indicate things have failed.
- */
-struct io_failure_record {
- /* Use rb_simple_node for search/insert */
- struct {
- struct rb_node rb_node;
- u64 bytenr;
- };
- struct page *page;
- u64 len;
- u64 logical;
- int this_mirror;
- int failed_mirror;
- int num_copies;
-};
-
-int btrfs_repair_one_sector(struct btrfs_inode *inode, struct btrfs_bio *failed_bbio,
- u32 bio_offset, struct page *page, unsigned int pgoff,
- bool submit_buffered);
-void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end);
-int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start,
- struct page *page, unsigned int pg_offset);
-
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
bool find_lock_delalloc_range(struct inode *inode,
struct page *locked_page, u64 *start,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 5de73466b2ca..41c77a100853 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -380,32 +380,25 @@ static int search_file_offset_in_bio(struct bio *bio, struct inode *inode,
/*
* Lookup the checksum for the read bio in csum tree.
*
- * @inode: inode that the bio is for.
- * @bio: bio to look up.
- * @dst: Buffer of size nblocks * btrfs_super_csum_size() used to return
- * checksum (nblocks = bio->bi_iter.bi_size / fs_info->sectorsize). If
- * NULL, the checksum buffer is allocated and returned in
- * btrfs_bio(bio)->csum instead.
- *
* Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise.
*/
-blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst)
+blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
- struct btrfs_bio *bbio = NULL;
+ struct btrfs_inode *inode = bbio->inode;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct extent_io_tree *io_tree = &inode->io_tree;
+ struct bio *bio = &bbio->bio;
struct btrfs_path *path;
const u32 sectorsize = fs_info->sectorsize;
const u32 csum_size = fs_info->csum_size;
u32 orig_len = bio->bi_iter.bi_size;
u64 orig_disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT;
u64 cur_disk_bytenr;
- u8 *csum;
const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits;
int count = 0;
blk_status_t ret = BLK_STS_OK;
- if ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
+ if ((inode->flags & BTRFS_INODE_NODATASUM) ||
test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state))
return BLK_STS_OK;
@@ -426,21 +419,14 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
if (!path)
return BLK_STS_RESOURCE;
- if (!dst) {
- bbio = btrfs_bio(bio);
-
- if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
- bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS);
- if (!bbio->csum) {
- btrfs_free_path(path);
- return BLK_STS_RESOURCE;
- }
- } else {
- bbio->csum = bbio->csum_inline;
+ if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
+ bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS);
+ if (!bbio->csum) {
+ btrfs_free_path(path);
+ return BLK_STS_RESOURCE;
}
- csum = bbio->csum;
} else {
- csum = dst;
+ bbio->csum = bbio->csum_inline;
}
/*
@@ -456,7 +442,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
* read from the commit root and sidestep a nasty deadlock
* between reading the free space cache and updating the csum tree.
*/
- if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
+ if (btrfs_is_free_space_inode(inode)) {
path->search_commit_root = 1;
path->skip_locking = 1;
}
@@ -479,14 +465,15 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
ASSERT(cur_disk_bytenr - orig_disk_bytenr < UINT_MAX);
sector_offset = (cur_disk_bytenr - orig_disk_bytenr) >>
fs_info->sectorsize_bits;
- csum_dst = csum + sector_offset * csum_size;
+ csum_dst = bbio->csum + sector_offset * csum_size;
count = search_csum_tree(fs_info, path, cur_disk_bytenr,
search_len, csum_dst);
if (count < 0) {
ret = errno_to_blk_status(count);
- if (bbio)
- btrfs_bio_free_csum(bbio);
+ if (bbio->csum != bbio->csum_inline)
+ kfree(bbio->csum);
+ bbio->csum = NULL;
break;
}
@@ -504,12 +491,13 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
memset(csum_dst, 0, csum_size);
count = 1;
- if (BTRFS_I(inode)->root->root_key.objectid ==
+ if (inode->root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) {
u64 file_offset;
int ret;
- ret = search_file_offset_in_bio(bio, inode,
+ ret = search_file_offset_in_bio(bio,
+ &inode->vfs_inode,
cur_disk_bytenr, &file_offset);
if (ret)
set_extent_bits(io_tree, file_offset,
@@ -784,23 +772,16 @@ fail:
/*
* Calculate checksums of the data contained inside a bio.
- *
- * @inode: Owner of the data inside the bio
- * @bio: Contains the data to be checksummed
- * @offset: If (u64)-1, @bio may contain discontiguous bio vecs, so the
- * file offsets are determined from the page offsets in the bio.
- * Otherwise, this is the starting file offset of the bio vecs in
- * @bio, which must be contiguous.
- * @one_ordered: If true, @bio only refers to one ordered extent.
*/
-blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
- u64 offset, bool one_ordered)
+blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio)
{
+ struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+ struct bio *bio = &bbio->bio;
+ u64 offset = bbio->file_offset;
struct btrfs_ordered_sum *sums;
struct btrfs_ordered_extent *ordered = NULL;
- const bool use_page_offsets = (offset == (u64)-1);
char *data;
struct bvec_iter iter;
struct bio_vec bvec;
@@ -828,9 +809,6 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
shash->tfm = fs_info->csum_shash;
bio_for_each_segment(bvec, bio, iter) {
- if (use_page_offsets)
- offset = page_offset(bvec.bv_page) + bvec.bv_offset;
-
if (!ordered) {
ordered = btrfs_lookup_ordered_extent(inode, offset);
/*
@@ -852,7 +830,7 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
- 1);
for (i = 0; i < blockcount; i++) {
- if (!one_ordered &&
+ if (!(bio->bi_opf & REQ_BTRFS_ONE_ORDERED) &&
!in_range(offset, ordered->file_offset,
ordered->num_bytes)) {
unsigned long bytes_left;
diff --git a/fs/btrfs/file-item.h b/fs/btrfs/file-item.h
index 031225668434..cd7f2ae515c0 100644
--- a/fs/btrfs/file-item.h
+++ b/fs/btrfs/file-item.h
@@ -38,7 +38,7 @@ static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize)
int btrfs_del_csums(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr, u64 len);
-blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst);
+blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio);
int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 objectid, u64 pos,
u64 num_bytes);
@@ -49,8 +49,10 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_ordered_sum *sums);
-blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
- u64 offset, bool one_ordered);
+blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio);
+int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
+ struct list_head *list, int search_commit,
+ bool nowait);
int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit,
bool nowait);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 91b00eb2440e..5cc5a1faaef5 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1017,7 +1017,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
unlock_page(pages[i]);
put_page(pages[i]);
}
- btrfs_start_ordered_extent(ordered, 1);
+ btrfs_start_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
return -EAGAIN;
}
@@ -3354,7 +3354,7 @@ bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end,
bool search_io_tree = true;
bool ret = false;
- while (cur_offset < end) {
+ while (cur_offset <= end) {
u64 delalloc_start;
u64 delalloc_end;
bool delalloc;
@@ -3541,6 +3541,7 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence)
struct extent_buffer *leaf = path->nodes[0];
struct btrfs_file_extent_item *extent;
u64 extent_end;
+ u8 type;
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
@@ -3596,10 +3597,16 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence)
extent = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
+ type = btrfs_file_extent_type(leaf, extent);
- if (btrfs_file_extent_disk_bytenr(leaf, extent) == 0 ||
- btrfs_file_extent_type(leaf, extent) ==
- BTRFS_FILE_EXTENT_PREALLOC) {
+ /*
+ * Can't access the extent's disk_bytenr field if this is an
+ * inline extent, since at that offset, it's where the extent
+ * data starts.
+ */
+ if (type == BTRFS_FILE_EXTENT_PREALLOC ||
+ (type == BTRFS_FILE_EXTENT_REG &&
+ btrfs_file_extent_disk_bytenr(leaf, extent) == 0)) {
/*
* Explicit hole or prealloc extent, search for delalloc.
* A prealloc extent is treated like a hole.
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index c667e878ef1a..4d155a48ec59 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1283,7 +1283,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
list_del(&free_space_root->dirty_list);
btrfs_tree_lock(free_space_root->node);
- btrfs_clean_tree_block(free_space_root->node);
+ btrfs_clear_buffer_dirty(trans, free_space_root->node);
btrfs_tree_unlock(free_space_root->node);
btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
free_space_root->node, 0, 1);
diff --git a/fs/btrfs/fs.c b/fs/btrfs/fs.c
index 5553e1f8afe8..31c1648bc0b4 100644
--- a/fs/btrfs/fs.c
+++ b/fs/btrfs/fs.c
@@ -24,6 +24,7 @@ void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag,
name, flag);
}
spin_unlock(&fs_info->super_lock);
+ set_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags);
}
}
@@ -46,6 +47,7 @@ void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag,
name, flag);
}
spin_unlock(&fs_info->super_lock);
+ set_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags);
}
}
@@ -68,6 +70,7 @@ void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag,
name, flag);
}
spin_unlock(&fs_info->super_lock);
+ set_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags);
}
}
@@ -90,5 +93,6 @@ void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag,
name, flag);
}
spin_unlock(&fs_info->super_lock);
+ set_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags);
}
}
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index a749367e5ae2..4c477eae6891 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -3,6 +3,7 @@
#ifndef BTRFS_FS_H
#define BTRFS_FS_H
+#include <linux/blkdev.h>
#include <linux/fs.h>
#include <linux/btrfs_tree.h>
#include <linux/sizes.h>
@@ -119,6 +120,18 @@ enum {
/* Indicate that we want to commit the transaction. */
BTRFS_FS_NEED_TRANS_COMMIT,
+ /*
+ * Indicate metadata over-commit is disabled. This is set when active
+ * zone tracking is needed.
+ */
+ BTRFS_FS_NO_OVERCOMMIT,
+
+ /*
+ * Indicate if we have some features changed, this is mostly for
+ * cleaner thread to update the sysfs interface.
+ */
+ BTRFS_FS_FEATURE_CHANGED,
+
#if BITS_PER_LONG == 32
/* Indicate if we have error/warn message printed on 32bit systems */
BTRFS_FS_32BIT_ERROR,
@@ -736,8 +749,10 @@ struct btrfs_fs_info {
*/
u64 zone_size;
- /* Max size to emit ZONE_APPEND write command */
+ /* Constraints for ZONE_APPEND commands: */
+ struct queue_limits limits;
u64 max_zone_append_size;
+
struct mutex zoned_meta_io_lock;
spinlock_t treelog_bg_lock;
u64 treelog_bg;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8bcad9940154..6c18dc9a1831 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -84,27 +84,12 @@ struct btrfs_dio_data {
};
struct btrfs_dio_private {
- struct btrfs_inode *inode;
-
- /*
- * Since DIO can use anonymous page, we cannot use page_offset() to
- * grab the file offset, thus need a dedicated member for file offset.
- */
+ /* Range of I/O */
u64 file_offset;
- /* Used for bio::bi_size */
u32 bytes;
- /*
- * References to this structure. There is one reference per in-flight
- * bio plus one while we're still setting up.
- */
- refcount_t refs;
-
- /* Array of checksums */
- u8 *csums;
-
/* This must be last */
- struct bio bio;
+ struct btrfs_bio bbio;
};
static struct bio_set btrfs_dio_bioset;
@@ -228,7 +213,7 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
{
unsigned long index = offset >> PAGE_SHIFT;
unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
- u64 page_start, page_end;
+ u64 page_start = 0, page_end = 0;
struct page *page;
if (locked_page) {
@@ -2536,19 +2521,6 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode,
}
/*
- * in order to insert checksums into the metadata in large chunks,
- * we wait until bio submission time. All the pages in the bio are
- * checksummed and sums are attached onto the ordered extent record.
- *
- * At IO completion time the cums attached on the ordered extent record
- * are inserted into the btree
- */
-blk_status_t btrfs_submit_bio_start(struct btrfs_inode *inode, struct bio *bio)
-{
- return btrfs_csum_one_bio(inode, bio, (u64)-1, false);
-}
-
-/*
* Split an extent_map at [start, start + len]
*
* This function is intended to be used only for extract_ordered_extent().
@@ -2663,19 +2635,19 @@ out:
return ret;
}
-static blk_status_t extract_ordered_extent(struct btrfs_inode *inode,
- struct bio *bio, loff_t file_offset)
+blk_status_t btrfs_extract_ordered_extent(struct btrfs_bio *bbio)
{
+ u64 start = (u64)bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
+ u64 len = bbio->bio.bi_iter.bi_size;
+ struct btrfs_inode *inode = bbio->inode;
struct btrfs_ordered_extent *ordered;
- u64 start = (u64)bio->bi_iter.bi_sector << SECTOR_SHIFT;
u64 file_len;
- u64 len = bio->bi_iter.bi_size;
u64 end = start + len;
u64 ordered_end;
u64 pre, post;
int ret = 0;
- ordered = btrfs_lookup_ordered_extent(inode, file_offset);
+ ordered = btrfs_lookup_ordered_extent(inode, bbio->file_offset);
if (WARN_ON_ONCE(!ordered))
return BLK_STS_IOERR;
@@ -2715,7 +2687,7 @@ static blk_status_t extract_ordered_extent(struct btrfs_inode *inode,
ret = btrfs_split_ordered_extent(ordered, pre, post);
if (ret)
goto out;
- ret = split_zoned_em(inode, file_offset, file_len, pre, post);
+ ret = split_zoned_em(inode, bbio->file_offset, file_len, pre, post);
out:
btrfs_put_ordered_extent(ordered);
@@ -2723,75 +2695,6 @@ out:
return errno_to_blk_status(ret);
}
-void btrfs_submit_data_write_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num)
-{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- blk_status_t ret;
-
- if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
- ret = extract_ordered_extent(inode, bio,
- page_offset(bio_first_bvec_all(bio)->bv_page));
- if (ret) {
- btrfs_bio_end_io(btrfs_bio(bio), ret);
- return;
- }
- }
-
- /*
- * If we need to checksum, and the I/O is not issued by fsync and
- * friends, that is ->sync_writers != 0, defer the submission to a
- * workqueue to parallelize it.
- *
- * Csum items for reloc roots have already been cloned at this point,
- * so they are handled as part of the no-checksum case.
- */
- if (!(inode->flags & BTRFS_INODE_NODATASUM) &&
- !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state) &&
- !btrfs_is_data_reloc_root(inode->root)) {
- if (!atomic_read(&inode->sync_writers) &&
- btrfs_wq_submit_bio(inode, bio, mirror_num, 0, WQ_SUBMIT_DATA))
- return;
-
- ret = btrfs_csum_one_bio(inode, bio, (u64)-1, false);
- if (ret) {
- btrfs_bio_end_io(btrfs_bio(bio), ret);
- return;
- }
- }
- btrfs_submit_bio(fs_info, bio, mirror_num);
-}
-
-void btrfs_submit_data_read_bio(struct btrfs_inode *inode, struct bio *bio,
- int mirror_num, enum btrfs_compression_type compress_type)
-{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- blk_status_t ret;
-
- if (compress_type != BTRFS_COMPRESS_NONE) {
- /*
- * btrfs_submit_compressed_read will handle completing the bio
- * if there were any errors, so just return here.
- */
- btrfs_submit_compressed_read(&inode->vfs_inode, bio, mirror_num);
- return;
- }
-
- /* Save the original iter for read repair */
- btrfs_bio(bio)->iter = bio->bi_iter;
-
- /*
- * Lookup bio sums does extra checks around whether we need to csum or
- * not, which is why we ignore skip_sum here.
- */
- ret = btrfs_lookup_bio_sums(&inode->vfs_inode, bio, NULL);
- if (ret) {
- btrfs_bio_end_io(btrfs_bio(bio), ret);
- return;
- }
-
- btrfs_submit_bio(fs_info, bio, mirror_num);
-}
-
/*
* given a list of ordered sums record them in the inode. This happens
* at IO completion time based on sums calculated at bio submission time.
@@ -2969,7 +2872,7 @@ again:
unlock_extent(&inode->io_tree, page_start, page_end,
&cached_state);
unlock_page(page);
- btrfs_start_ordered_extent(ordered, 1);
+ btrfs_start_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
goto again;
}
@@ -3259,15 +3162,13 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
goto out;
}
- /* A valid bdev implies a write on a sequential zone */
- if (ordered_extent->bdev) {
+ /* A valid ->physical implies a write on a sequential zone. */
+ if (ordered_extent->physical != (u64)-1) {
btrfs_rewrite_logical_zoned(ordered_extent);
btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
ordered_extent->disk_num_bytes);
}
- btrfs_free_io_failure_record(inode, start, end);
-
if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
truncated = true;
logical_len = ordered_extent->truncated_len;
@@ -3474,109 +3375,55 @@ static u8 *btrfs_csum_ptr(const struct btrfs_fs_info *fs_info, u8 *csums, u64 of
}
/*
- * check_data_csum - verify checksum of one sector of uncompressed data
- * @inode: inode
- * @bbio: btrfs_bio which contains the csum
+ * Verify the checksum of a single data sector.
+ *
+ * @bbio: btrfs_io_bio which contains the csum
+ * @dev: device the sector is on
* @bio_offset: offset to the beginning of the bio (in bytes)
- * @page: page where is the data to be verified
- * @pgoff: offset inside the page
+ * @bv: bio_vec to check
*
- * The length of such check is always one sector size.
+ * Check if the checksum on a data block is valid. When a checksum mismatch is
+ * detected, report the error and fill the corrupted range with zero.
*
- * When csum mismatch is detected, we will also report the error and fill the
- * corrupted range with zero. (Thus it needs the extra parameters)
+ * Return %true if the sector is ok or had no checksum to start with, else %false.
*/
-int btrfs_check_data_csum(struct btrfs_inode *inode, struct btrfs_bio *bbio,
- u32 bio_offset, struct page *page, u32 pgoff)
+bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
+ u32 bio_offset, struct bio_vec *bv)
{
+ struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- u32 len = fs_info->sectorsize;
+ u64 file_offset = bbio->file_offset + bio_offset;
+ u64 end = file_offset + bv->bv_len - 1;
u8 *csum_expected;
u8 csum[BTRFS_CSUM_SIZE];
- ASSERT(pgoff + len <= PAGE_SIZE);
+ ASSERT(bv->bv_len == fs_info->sectorsize);
- csum_expected = btrfs_csum_ptr(fs_info, bbio->csum, bio_offset);
+ if (!bbio->csum)
+ return true;
+
+ if (btrfs_is_data_reloc_root(inode->root) &&
+ test_range_bit(&inode->io_tree, file_offset, end, EXTENT_NODATASUM,
+ 1, NULL)) {
+ /* Skip the range without csum for data reloc inode */
+ clear_extent_bits(&inode->io_tree, file_offset, end,
+ EXTENT_NODATASUM);
+ return true;
+ }
- if (btrfs_check_sector_csum(fs_info, page, pgoff, csum, csum_expected))
+ csum_expected = btrfs_csum_ptr(fs_info, bbio->csum, bio_offset);
+ if (btrfs_check_sector_csum(fs_info, bv->bv_page, bv->bv_offset, csum,
+ csum_expected))
goto zeroit;
- return 0;
+ return true;
zeroit:
- btrfs_print_data_csum_error(inode, bbio->file_offset + bio_offset,
- csum, csum_expected, bbio->mirror_num);
- if (bbio->device)
- btrfs_dev_stat_inc_and_print(bbio->device,
- BTRFS_DEV_STAT_CORRUPTION_ERRS);
- memzero_page(page, pgoff, len);
- return -EIO;
-}
-
-/*
- * When reads are done, we need to check csums to verify the data is correct.
- * if there's a match, we allow the bio to finish. If not, the code in
- * extent_io.c will try to find good copies for us.
- *
- * @bio_offset: offset to the beginning of the bio (in bytes)
- * @start: file offset of the range start
- * @end: file offset of the range end (inclusive)
- *
- * Return a bitmap where bit set means a csum mismatch, and bit not set means
- * csum match.
- */
-unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
- u32 bio_offset, struct page *page,
- u64 start, u64 end)
-{
- struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
- struct btrfs_root *root = inode->root;
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct extent_io_tree *io_tree = &inode->io_tree;
- const u32 sectorsize = root->fs_info->sectorsize;
- u32 pg_off;
- unsigned int result = 0;
-
- /*
- * This only happens for NODATASUM or compressed read.
- * Normally this should be covered by above check for compressed read
- * or the next check for NODATASUM. Just do a quicker exit here.
- */
- if (bbio->csum == NULL)
- return 0;
-
- if (inode->flags & BTRFS_INODE_NODATASUM)
- return 0;
-
- if (unlikely(test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)))
- return 0;
-
- ASSERT(page_offset(page) <= start &&
- end <= page_offset(page) + PAGE_SIZE - 1);
- for (pg_off = offset_in_page(start);
- pg_off < offset_in_page(end);
- pg_off += sectorsize, bio_offset += sectorsize) {
- u64 file_offset = pg_off + page_offset(page);
- int ret;
-
- if (btrfs_is_data_reloc_root(root) &&
- test_range_bit(io_tree, file_offset,
- file_offset + sectorsize - 1,
- EXTENT_NODATASUM, 1, NULL)) {
- /* Skip the range without csum for data reloc inode */
- clear_extent_bits(io_tree, file_offset,
- file_offset + sectorsize - 1,
- EXTENT_NODATASUM);
- continue;
- }
- ret = btrfs_check_data_csum(inode, bbio, bio_offset, page, pg_off);
- if (ret < 0) {
- const int nr_bit = (pg_off - offset_in_page(start)) >>
- root->fs_info->sectorsize_bits;
-
- result |= (1U << nr_bit);
- }
- }
- return result;
+ btrfs_print_data_csum_error(inode, file_offset, csum, csum_expected,
+ bbio->mirror_num);
+ if (dev)
+ btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS);
+ memzero_bvec(bv);
+ return false;
}
/*
@@ -4987,7 +4834,7 @@ again:
unlock_extent(io_tree, block_start, block_end, &cached_state);
unlock_page(page);
put_page(page);
- btrfs_start_ordered_extent(ordered, 1);
+ btrfs_start_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
goto again;
}
@@ -5281,7 +5128,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
return ret;
}
-static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+static int btrfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
@@ -5291,7 +5138,7 @@ static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentr
if (btrfs_root_readonly(root))
return -EROFS;
- err = setattr_prepare(mnt_userns, dentry, attr);
+ err = setattr_prepare(idmap, dentry, attr);
if (err)
return err;
@@ -5302,12 +5149,12 @@ static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentr
}
if (attr->ia_valid) {
- setattr_copy(mnt_userns, inode, attr);
+ setattr_copy(idmap, inode, attr);
inode_inc_iversion(inode);
err = btrfs_dirty_inode(BTRFS_I(inode));
if (!err && attr->ia_valid & ATTR_MODE)
- err = posix_acl_chmod(mnt_userns, dentry, inode->i_mode);
+ err = posix_acl_chmod(idmap, dentry, inode->i_mode);
}
return err;
@@ -5466,8 +5313,6 @@ void btrfs_evict_inode(struct inode *inode)
if (is_bad_inode(inode))
goto no_delete;
- btrfs_free_io_failure_record(BTRFS_I(inode), 0, (u64)-1);
-
if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
goto no_delete;
@@ -6724,7 +6569,7 @@ out_inode:
return err;
}
-static int btrfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int btrfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct inode *inode;
@@ -6732,13 +6577,13 @@ static int btrfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
inode = new_inode(dir->i_sb);
if (!inode)
return -ENOMEM;
- inode_init_owner(mnt_userns, inode, dir, mode);
+ inode_init_owner(idmap, inode, dir, mode);
inode->i_op = &btrfs_special_inode_operations;
init_special_inode(inode, inode->i_mode, rdev);
return btrfs_create_common(dir, dentry, inode);
}
-static int btrfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int btrfs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
struct inode *inode;
@@ -6746,7 +6591,7 @@ static int btrfs_create(struct user_namespace *mnt_userns, struct inode *dir,
inode = new_inode(dir->i_sb);
if (!inode)
return -ENOMEM;
- inode_init_owner(mnt_userns, inode, dir, mode);
+ inode_init_owner(idmap, inode, dir, mode);
inode->i_fop = &btrfs_file_operations;
inode->i_op = &btrfs_file_inode_operations;
inode->i_mapping->a_ops = &btrfs_aops;
@@ -6837,7 +6682,7 @@ fail:
return err;
}
-static int btrfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int btrfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct inode *inode;
@@ -6845,7 +6690,7 @@ static int btrfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
inode = new_inode(dir->i_sb);
if (!inode)
return -ENOMEM;
- inode_init_owner(mnt_userns, inode, dir, S_IFDIR | mode);
+ inode_init_owner(idmap, inode, dir, S_IFDIR | mode);
inode->i_op = &btrfs_dir_inode_operations;
inode->i_fop = &btrfs_dir_file_operations;
return btrfs_create_common(dir, dentry, inode);
@@ -7092,7 +6937,7 @@ next:
* Other members are not utilized for inline extents.
*/
ASSERT(em->block_start == EXTENT_MAP_INLINE);
- ASSERT(em->len = fs_info->sectorsize);
+ ASSERT(em->len == fs_info->sectorsize);
ret = read_inline_extent(inode, path, page);
if (ret < 0)
@@ -7392,7 +7237,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
*/
if (writing ||
test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags))
- btrfs_start_ordered_extent(ordered, 1);
+ btrfs_start_ordered_extent(ordered);
else
ret = nowait ? -EAGAIN : -ENOTBLK;
btrfs_put_ordered_extent(ordered);
@@ -7833,10 +7678,6 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
iomap->offset = start;
iomap->bdev = fs_info->fs_devices->latest_dev->bdev;
iomap->length = len;
-
- if (write && btrfs_use_zone_append(BTRFS_I(inode), em->block_start))
- iomap->flags |= IOMAP_F_ZONE_APPEND;
-
free_extent_map(em);
return 0;
@@ -7888,267 +7729,47 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
return ret;
}
-static void btrfs_dio_private_put(struct btrfs_dio_private *dip)
-{
- /*
- * This implies a barrier so that stores to dio_bio->bi_status before
- * this and loads of dio_bio->bi_status after this are fully ordered.
- */
- if (!refcount_dec_and_test(&dip->refs))
- return;
-
- if (btrfs_op(&dip->bio) == BTRFS_MAP_WRITE) {
- btrfs_mark_ordered_io_finished(dip->inode, NULL,
- dip->file_offset, dip->bytes,
- !dip->bio.bi_status);
- } else {
- unlock_extent(&dip->inode->io_tree,
- dip->file_offset,
- dip->file_offset + dip->bytes - 1, NULL);
- }
-
- kfree(dip->csums);
- bio_endio(&dip->bio);
-}
-
-void btrfs_submit_dio_repair_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num)
-{
- struct btrfs_dio_private *dip = btrfs_bio(bio)->private;
-
- BUG_ON(bio_op(bio) == REQ_OP_WRITE);
-
- refcount_inc(&dip->refs);
- btrfs_submit_bio(inode->root->fs_info, bio, mirror_num);
-}
-
-static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
- struct btrfs_bio *bbio,
- const bool uptodate)
-{
- struct inode *inode = &dip->inode->vfs_inode;
- struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
- const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
- blk_status_t err = BLK_STS_OK;
- struct bvec_iter iter;
- struct bio_vec bv;
- u32 offset;
-
- btrfs_bio_for_each_sector(fs_info, bv, bbio, iter, offset) {
- u64 start = bbio->file_offset + offset;
-
- if (uptodate &&
- (!csum || !btrfs_check_data_csum(BTRFS_I(inode), bbio, offset,
- bv.bv_page, bv.bv_offset))) {
- btrfs_clean_io_failure(BTRFS_I(inode), start,
- bv.bv_page, bv.bv_offset);
- } else {
- int ret;
-
- ret = btrfs_repair_one_sector(BTRFS_I(inode), bbio, offset,
- bv.bv_page, bv.bv_offset, false);
- if (ret)
- err = errno_to_blk_status(ret);
- }
- }
-
- return err;
-}
-
-blk_status_t btrfs_submit_bio_start_direct_io(struct btrfs_inode *inode,
- struct bio *bio,
- u64 dio_file_offset)
+static void btrfs_dio_end_io(struct btrfs_bio *bbio)
{
- return btrfs_csum_one_bio(inode, bio, dio_file_offset, false);
-}
-
-static void btrfs_end_dio_bio(struct btrfs_bio *bbio)
-{
- struct btrfs_dio_private *dip = bbio->private;
+ struct btrfs_dio_private *dip =
+ container_of(bbio, struct btrfs_dio_private, bbio);
+ struct btrfs_inode *inode = bbio->inode;
struct bio *bio = &bbio->bio;
- blk_status_t err = bio->bi_status;
-
- if (err)
- btrfs_warn(dip->inode->root->fs_info,
- "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d",
- btrfs_ino(dip->inode), bio_op(bio),
- bio->bi_opf, bio->bi_iter.bi_sector,
- bio->bi_iter.bi_size, err);
-
- if (bio_op(bio) == REQ_OP_READ)
- err = btrfs_check_read_dio_bio(dip, bbio, !err);
- if (err)
- dip->bio.bi_status = err;
-
- btrfs_record_physical_zoned(&dip->inode->vfs_inode, bbio->file_offset, bio);
-
- bio_put(bio);
- btrfs_dio_private_put(dip);
-}
-
-static void btrfs_submit_dio_bio(struct bio *bio, struct btrfs_inode *inode,
- u64 file_offset, int async_submit)
-{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct btrfs_dio_private *dip = btrfs_bio(bio)->private;
- blk_status_t ret;
-
- /* Save the original iter for read repair */
- if (btrfs_op(bio) == BTRFS_MAP_READ)
- btrfs_bio(bio)->iter = bio->bi_iter;
-
- if (inode->flags & BTRFS_INODE_NODATASUM)
- goto map;
+ if (bio->bi_status) {
+ btrfs_warn(inode->root->fs_info,
+ "direct IO failed ino %llu op 0x%0x offset %#llx len %u err no %d",
+ btrfs_ino(inode), bio->bi_opf,
+ dip->file_offset, dip->bytes, bio->bi_status);
+ }
- if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
- /* Check btrfs_submit_data_write_bio() for async submit rules */
- if (async_submit && !atomic_read(&inode->sync_writers) &&
- btrfs_wq_submit_bio(inode, bio, 0, file_offset,
- WQ_SUBMIT_DATA_DIO))
- return;
+ if (btrfs_op(bio) == BTRFS_MAP_WRITE)
+ btrfs_mark_ordered_io_finished(inode, NULL, dip->file_offset,
+ dip->bytes, !bio->bi_status);
+ else
+ unlock_extent(&inode->io_tree, dip->file_offset,
+ dip->file_offset + dip->bytes - 1, NULL);
- /*
- * If we aren't doing async submit, calculate the csum of the
- * bio now.
- */
- ret = btrfs_csum_one_bio(inode, bio, file_offset, false);
- if (ret) {
- btrfs_bio_end_io(btrfs_bio(bio), ret);
- return;
- }
- } else {
- btrfs_bio(bio)->csum = btrfs_csum_ptr(fs_info, dip->csums,
- file_offset - dip->file_offset);
- }
-map:
- btrfs_submit_bio(fs_info, bio, 0);
+ bbio->bio.bi_private = bbio->private;
+ iomap_dio_bio_end_io(bio);
}
-static void btrfs_submit_direct(const struct iomap_iter *iter,
- struct bio *dio_bio, loff_t file_offset)
+static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio,
+ loff_t file_offset)
{
+ struct btrfs_bio *bbio = btrfs_bio(bio);
struct btrfs_dio_private *dip =
- container_of(dio_bio, struct btrfs_dio_private, bio);
- struct inode *inode = iter->inode;
- const bool write = (btrfs_op(dio_bio) == BTRFS_MAP_WRITE);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- const bool raid56 = (btrfs_data_alloc_profile(fs_info) &
- BTRFS_BLOCK_GROUP_RAID56_MASK);
- struct bio *bio;
- u64 start_sector;
- int async_submit = 0;
- u64 submit_len;
- u64 clone_offset = 0;
- u64 clone_len;
- u64 logical;
- int ret;
- blk_status_t status;
- struct btrfs_io_geometry geom;
+ container_of(bbio, struct btrfs_dio_private, bbio);
struct btrfs_dio_data *dio_data = iter->private;
- struct extent_map *em = NULL;
-
- dip->inode = BTRFS_I(inode);
- dip->file_offset = file_offset;
- dip->bytes = dio_bio->bi_iter.bi_size;
- refcount_set(&dip->refs, 1);
- dip->csums = NULL;
-
- if (!write && !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
- unsigned int nr_sectors =
- (dio_bio->bi_iter.bi_size >> fs_info->sectorsize_bits);
-
- /*
- * Load the csums up front to reduce csum tree searches and
- * contention when submitting bios.
- */
- status = BLK_STS_RESOURCE;
- dip->csums = kcalloc(nr_sectors, fs_info->csum_size, GFP_NOFS);
- if (!dip->csums)
- goto out_err;
-
- status = btrfs_lookup_bio_sums(inode, dio_bio, dip->csums);
- if (status != BLK_STS_OK)
- goto out_err;
- }
-
- start_sector = dio_bio->bi_iter.bi_sector;
- submit_len = dio_bio->bi_iter.bi_size;
-
- do {
- logical = start_sector << 9;
- em = btrfs_get_chunk_map(fs_info, logical, submit_len);
- if (IS_ERR(em)) {
- status = errno_to_blk_status(PTR_ERR(em));
- em = NULL;
- goto out_err_em;
- }
- ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(dio_bio),
- logical, &geom);
- if (ret) {
- status = errno_to_blk_status(ret);
- goto out_err_em;
- }
-
- clone_len = min(submit_len, geom.len);
- ASSERT(clone_len <= UINT_MAX);
-
- /*
- * This will never fail as it's passing GPF_NOFS and
- * the allocation is backed by btrfs_bioset.
- */
- bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len,
- btrfs_end_dio_bio, dip);
- btrfs_bio(bio)->file_offset = file_offset;
-
- if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
- status = extract_ordered_extent(BTRFS_I(inode), bio,
- file_offset);
- if (status) {
- bio_put(bio);
- goto out_err;
- }
- }
-
- ASSERT(submit_len >= clone_len);
- submit_len -= clone_len;
-
- /*
- * Increase the count before we submit the bio so we know
- * the end IO handler won't happen before we increase the
- * count. Otherwise, the dip might get freed before we're
- * done setting it up.
- *
- * We transfer the initial reference to the last bio, so we
- * don't need to increment the reference count for the last one.
- */
- if (submit_len > 0) {
- refcount_inc(&dip->refs);
- /*
- * If we are submitting more than one bio, submit them
- * all asynchronously. The exception is RAID 5 or 6, as
- * asynchronous checksums make it difficult to collect
- * full stripe writes.
- */
- if (!raid56)
- async_submit = 1;
- }
- btrfs_submit_dio_bio(bio, BTRFS_I(inode), file_offset, async_submit);
+ btrfs_bio_init(bbio, BTRFS_I(iter->inode), btrfs_dio_end_io, bio->bi_private);
+ bbio->file_offset = file_offset;
- dio_data->submitted += clone_len;
- clone_offset += clone_len;
- start_sector += clone_len >> 9;
- file_offset += clone_len;
-
- free_extent_map(em);
- } while (submit_len > 0);
- return;
+ dip->file_offset = file_offset;
+ dip->bytes = bio->bi_iter.bi_size;
-out_err_em:
- free_extent_map(em);
-out_err:
- dio_bio->bi_status = status;
- btrfs_dio_private_put(dip);
+ dio_data->submitted += bio->bi_iter.bi_size;
+ btrfs_submit_bio(bio, 0);
}
static const struct iomap_ops btrfs_dio_iomap_ops = {
@@ -8157,7 +7778,7 @@ static const struct iomap_ops btrfs_dio_iomap_ops = {
};
static const struct iomap_dio_ops btrfs_dio_ops = {
- .submit_io = btrfs_submit_direct,
+ .submit_io = btrfs_dio_submit_io,
.bio_set = &btrfs_dio_bioset,
};
@@ -8552,7 +8173,7 @@ again:
unlock_extent(io_tree, page_start, page_end, &cached_state);
unlock_page(page);
up_read(&BTRFS_I(inode)->i_mmap_lock);
- btrfs_start_ordered_extent(ordered, 1);
+ btrfs_start_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
goto again;
}
@@ -8802,7 +8423,7 @@ out:
return ret;
}
-struct inode *btrfs_new_subvol_inode(struct user_namespace *mnt_userns,
+struct inode *btrfs_new_subvol_inode(struct mnt_idmap *idmap,
struct inode *dir)
{
struct inode *inode;
@@ -8813,7 +8434,7 @@ struct inode *btrfs_new_subvol_inode(struct user_namespace *mnt_userns,
* Subvolumes don't inherit the sgid bit or the parent's gid if
* the parent's sgid bit is set. This is probably a bug.
*/
- inode_init_owner(mnt_userns, inode, NULL,
+ inode_init_owner(idmap, inode, NULL,
S_IFDIR | (~current_umask() & S_IRWXUGO));
inode->i_op = &btrfs_dir_inode_operations;
inode->i_fop = &btrfs_dir_file_operations;
@@ -8850,7 +8471,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->last_log_commit = 0;
spin_lock_init(&ei->lock);
- spin_lock_init(&ei->io_failure_lock);
ei->outstanding_extents = 0;
if (sb->s_magic != BTRFS_TEST_MAGIC)
btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv,
@@ -8870,7 +8490,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->io_tree.inode = ei;
extent_io_tree_init(fs_info, &ei->file_extent_tree,
IO_TREE_INODE_FILE_EXTENT);
- ei->io_failure_tree = RB_ROOT;
atomic_set(&ei->sync_writers, 0);
mutex_init(&ei->log_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);
@@ -8994,7 +8613,7 @@ int __init btrfs_init_cachep(void)
goto fail;
if (bioset_init(&btrfs_dio_bioset, BIO_POOL_SIZE,
- offsetof(struct btrfs_dio_private, bio),
+ offsetof(struct btrfs_dio_private, bbio.bio),
BIOSET_NEED_BVECS))
goto fail;
@@ -9004,7 +8623,7 @@ fail:
return -ENOMEM;
}
-static int btrfs_getattr(struct user_namespace *mnt_userns,
+static int btrfs_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
@@ -9034,7 +8653,7 @@ static int btrfs_getattr(struct user_namespace *mnt_userns,
STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP);
- generic_fillattr(mnt_userns, inode, stat);
+ generic_fillattr(idmap, inode, stat);
stat->dev = BTRFS_I(inode)->root->anon_dev;
spin_lock(&BTRFS_I(inode)->lock);
@@ -9289,14 +8908,14 @@ out_notrans:
return ret;
}
-static struct inode *new_whiteout_inode(struct user_namespace *mnt_userns,
+static struct inode *new_whiteout_inode(struct mnt_idmap *idmap,
struct inode *dir)
{
struct inode *inode;
inode = new_inode(dir->i_sb);
if (inode) {
- inode_init_owner(mnt_userns, inode, dir,
+ inode_init_owner(idmap, inode, dir,
S_IFCHR | WHITEOUT_MODE);
inode->i_op = &btrfs_special_inode_operations;
init_special_inode(inode, inode->i_mode, WHITEOUT_DEV);
@@ -9304,7 +8923,7 @@ static struct inode *new_whiteout_inode(struct user_namespace *mnt_userns,
return inode;
}
-static int btrfs_rename(struct user_namespace *mnt_userns,
+static int btrfs_rename(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
@@ -9376,9 +8995,11 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
filemap_flush(old_inode->i_mapping);
if (flags & RENAME_WHITEOUT) {
- whiteout_args.inode = new_whiteout_inode(mnt_userns, old_dir);
- if (!whiteout_args.inode)
- return -ENOMEM;
+ whiteout_args.inode = new_whiteout_inode(idmap, old_dir);
+ if (!whiteout_args.inode) {
+ ret = -ENOMEM;
+ goto out_fscrypt_names;
+ }
ret = btrfs_new_inode_prepare(&whiteout_args, &trans_num_items);
if (ret)
goto out_whiteout_inode;
@@ -9543,7 +9164,7 @@ out_fscrypt_names:
return ret;
}
-static int btrfs_rename2(struct user_namespace *mnt_userns, struct inode *old_dir,
+static int btrfs_rename2(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
@@ -9556,7 +9177,7 @@ static int btrfs_rename2(struct user_namespace *mnt_userns, struct inode *old_di
ret = btrfs_rename_exchange(old_dir, old_dentry, new_dir,
new_dentry);
else
- ret = btrfs_rename(mnt_userns, old_dir, old_dentry, new_dir,
+ ret = btrfs_rename(idmap, old_dir, old_dentry, new_dir,
new_dentry, flags);
btrfs_btree_balance_dirty(BTRFS_I(new_dir)->root->fs_info);
@@ -9756,7 +9377,7 @@ out:
return ret;
}
-static int btrfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
@@ -9784,7 +9405,7 @@ static int btrfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
inode = new_inode(dir->i_sb);
if (!inode)
return -ENOMEM;
- inode_init_owner(mnt_userns, inode, dir, S_IFLNK | S_IRWXUGO);
+ inode_init_owner(idmap, inode, dir, S_IFLNK | S_IRWXUGO);
inode->i_op = &btrfs_symlink_inode_operations;
inode_nohighmem(inode);
inode->i_mapping->a_ops = &btrfs_aops;
@@ -10073,7 +9694,7 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
min_size, actual_len, alloc_hint, trans);
}
-static int btrfs_permission(struct user_namespace *mnt_userns,
+static int btrfs_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -10086,10 +9707,10 @@ static int btrfs_permission(struct user_namespace *mnt_userns,
if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
return -EACCES;
}
- return generic_permission(mnt_userns, inode, mask);
+ return generic_permission(idmap, inode, mask);
}
-static int btrfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+static int btrfs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
struct file *file, umode_t mode)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
@@ -10107,7 +9728,7 @@ static int btrfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
inode = new_inode(dir->i_sb);
if (!inode)
return -ENOMEM;
- inode_init_owner(mnt_userns, inode, dir, mode);
+ inode_init_owner(idmap, inode, dir, mode);
inode->i_fop = &btrfs_file_operations;
inode->i_op = &btrfs_file_inode_operations;
inode->i_mapping->a_ops = &btrfs_aops;
@@ -10287,65 +9908,13 @@ struct btrfs_encoded_read_private {
wait_queue_head_t wait;
atomic_t pending;
blk_status_t status;
- bool skip_csum;
};
-static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode,
- struct bio *bio, int mirror_num)
-{
- struct btrfs_encoded_read_private *priv = btrfs_bio(bio)->private;
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- blk_status_t ret;
-
- if (!priv->skip_csum) {
- ret = btrfs_lookup_bio_sums(&inode->vfs_inode, bio, NULL);
- if (ret)
- return ret;
- }
-
- atomic_inc(&priv->pending);
- btrfs_submit_bio(fs_info, bio, mirror_num);
- return BLK_STS_OK;
-}
-
-static blk_status_t btrfs_encoded_read_verify_csum(struct btrfs_bio *bbio)
-{
- const bool uptodate = (bbio->bio.bi_status == BLK_STS_OK);
- struct btrfs_encoded_read_private *priv = bbio->private;
- struct btrfs_inode *inode = priv->inode;
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- u32 sectorsize = fs_info->sectorsize;
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
- u32 bio_offset = 0;
-
- if (priv->skip_csum || !uptodate)
- return bbio->bio.bi_status;
-
- bio_for_each_segment_all(bvec, &bbio->bio, iter_all) {
- unsigned int i, nr_sectors, pgoff;
-
- nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
- pgoff = bvec->bv_offset;
- for (i = 0; i < nr_sectors; i++) {
- ASSERT(pgoff < PAGE_SIZE);
- if (btrfs_check_data_csum(inode, bbio, bio_offset,
- bvec->bv_page, pgoff))
- return BLK_STS_IOERR;
- bio_offset += sectorsize;
- pgoff += sectorsize;
- }
- }
- return BLK_STS_OK;
-}
-
static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
{
struct btrfs_encoded_read_private *priv = bbio->private;
- blk_status_t status;
- status = btrfs_encoded_read_verify_csum(bbio);
- if (status) {
+ if (bbio->bio.bi_status) {
/*
* The memory barrier implied by the atomic_dec_return() here
* pairs with the memory barrier implied by the
@@ -10354,11 +9923,10 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
* write is observed before the load of status in
* btrfs_encoded_read_regular_fill_pages().
*/
- WRITE_ONCE(priv->status, status);
+ WRITE_ONCE(priv->status, bbio->bio.bi_status);
}
if (!atomic_dec_return(&priv->pending))
wake_up(&priv->wait);
- btrfs_bio_free_csum(bbio);
bio_put(&bbio->bio);
}
@@ -10366,47 +9934,26 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
u64 file_offset, u64 disk_bytenr,
u64 disk_io_size, struct page **pages)
{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_encoded_read_private priv = {
.inode = inode,
.file_offset = file_offset,
.pending = ATOMIC_INIT(1),
- .skip_csum = (inode->flags & BTRFS_INODE_NODATASUM),
};
unsigned long i = 0;
u64 cur = 0;
- int ret;
init_waitqueue_head(&priv.wait);
- /*
- * Submit bios for the extent, splitting due to bio or stripe limits as
- * necessary.
- */
+ /* Submit bios for the extent, splitting due to bio limits as necessary. */
while (cur < disk_io_size) {
- struct extent_map *em;
- struct btrfs_io_geometry geom;
struct bio *bio = NULL;
- u64 remaining;
+ u64 remaining = disk_io_size - cur;
- em = btrfs_get_chunk_map(fs_info, disk_bytenr + cur,
- disk_io_size - cur);
- if (IS_ERR(em)) {
- ret = PTR_ERR(em);
- } else {
- ret = btrfs_get_io_geometry(fs_info, em, BTRFS_MAP_READ,
- disk_bytenr + cur, &geom);
- free_extent_map(em);
- }
- if (ret) {
- WRITE_ONCE(priv.status, errno_to_blk_status(ret));
- break;
- }
- remaining = min(geom.len, disk_io_size - cur);
while (bio || remaining) {
size_t bytes = min_t(u64, remaining, PAGE_SIZE);
if (!bio) {
bio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ,
+ inode,
btrfs_encoded_read_endio,
&priv);
bio->bi_iter.bi_sector =
@@ -10415,14 +9962,8 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
if (!bytes ||
bio_add_page(bio, pages[i], bytes, 0) < bytes) {
- blk_status_t status;
-
- status = submit_encoded_read_bio(inode, bio, 0);
- if (status) {
- WRITE_ONCE(priv.status, status);
- bio_put(bio);
- goto out;
- }
+ atomic_inc(&priv.pending);
+ btrfs_submit_bio(bio, 0);
bio = NULL;
continue;
}
@@ -10433,7 +9974,6 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
}
}
-out:
if (atomic_dec_return(&priv.pending))
io_wait_event(priv.wait, !atomic_read(&priv.pending));
/* See btrfs_encoded_read_endio() for ordering. */
@@ -10993,9 +10533,8 @@ static int btrfs_add_swap_extent(struct swap_info_struct *sis,
return 0;
max_pages = sis->max - bsi->nr_pages;
- first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
- next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
- PAGE_SIZE) >> PAGE_SHIFT;
+ first_ppage = PAGE_ALIGN(bsi->block_start) >> PAGE_SHIFT;
+ next_ppage = PAGE_ALIGN_DOWN(bsi->block_start + bsi->block_len) >> PAGE_SHIFT;
if (first_ppage >= next_ppage)
return 0;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7e348bd2ccde..84626c8ad5bf 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -243,7 +243,7 @@ int btrfs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
return 0;
}
-int btrfs_fileattr_set(struct user_namespace *mnt_userns,
+int btrfs_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa)
{
struct inode *inode = d_inode(dentry);
@@ -578,7 +578,7 @@ static unsigned int create_subvol_num_items(struct btrfs_qgroup_inherit *inherit
return num_items;
}
-static noinline int create_subvol(struct user_namespace *mnt_userns,
+static noinline int create_subvol(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry,
struct btrfs_qgroup_inherit *inherit)
{
@@ -623,7 +623,7 @@ static noinline int create_subvol(struct user_namespace *mnt_userns,
if (ret < 0)
goto out_root_item;
- new_inode_args.inode = btrfs_new_subvol_inode(mnt_userns, dir);
+ new_inode_args.inode = btrfs_new_subvol_inode(idmap, dir);
if (!new_inode_args.inode) {
ret = -ENOMEM;
goto out_anon_dev;
@@ -707,7 +707,7 @@ static noinline int create_subvol(struct user_namespace *mnt_userns,
* exists).
*/
btrfs_tree_lock(leaf);
- btrfs_clean_tree_block(leaf);
+ btrfs_clear_buffer_dirty(trans, leaf);
btrfs_tree_unlock(leaf);
btrfs_free_tree_block(trans, objectid, leaf, 0, 1);
free_extent_buffer(leaf);
@@ -898,7 +898,7 @@ free_pending:
* nfs_async_unlink().
*/
-static int btrfs_may_delete(struct user_namespace *mnt_userns,
+static int btrfs_may_delete(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *victim, int isdir)
{
int error;
@@ -909,12 +909,12 @@ static int btrfs_may_delete(struct user_namespace *mnt_userns,
BUG_ON(d_inode(victim->d_parent) != dir);
audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
- error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
+ error = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
if (error)
return error;
if (IS_APPEND(dir))
return -EPERM;
- if (check_sticky(mnt_userns, dir, d_inode(victim)) ||
+ if (check_sticky(idmap, dir, d_inode(victim)) ||
IS_APPEND(d_inode(victim)) || IS_IMMUTABLE(d_inode(victim)) ||
IS_SWAPFILE(d_inode(victim)))
return -EPERM;
@@ -933,16 +933,16 @@ static int btrfs_may_delete(struct user_namespace *mnt_userns,
}
/* copy of may_create in fs/namei.c() */
-static inline int btrfs_may_create(struct user_namespace *mnt_userns,
+static inline int btrfs_may_create(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *child)
{
if (d_really_is_positive(child))
return -EEXIST;
if (IS_DEADDIR(dir))
return -ENOENT;
- if (!fsuidgid_has_mapping(dir->i_sb, mnt_userns))
+ if (!fsuidgid_has_mapping(dir->i_sb, idmap))
return -EOVERFLOW;
- return inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
+ return inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
}
/*
@@ -951,7 +951,7 @@ static inline int btrfs_may_create(struct user_namespace *mnt_userns,
* inside this filesystem so it's quite a bit simpler.
*/
static noinline int btrfs_mksubvol(const struct path *parent,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
const char *name, int namelen,
struct btrfs_root *snap_src,
bool readonly,
@@ -967,12 +967,12 @@ static noinline int btrfs_mksubvol(const struct path *parent,
if (error == -EINTR)
return error;
- dentry = lookup_one(mnt_userns, name, parent->dentry, namelen);
+ dentry = lookup_one(idmap, name, parent->dentry, namelen);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_unlock;
- error = btrfs_may_create(mnt_userns, dir, dentry);
+ error = btrfs_may_create(idmap, dir, dentry);
if (error)
goto out_dput;
@@ -993,7 +993,7 @@ static noinline int btrfs_mksubvol(const struct path *parent,
if (snap_src)
error = create_snapshot(snap_src, dir, dentry, readonly, inherit);
else
- error = create_subvol(mnt_userns, dir, dentry, inherit);
+ error = create_subvol(idmap, dir, dentry, inherit);
if (!error)
fsnotify_mkdir(dir, dentry);
@@ -1007,7 +1007,7 @@ out_unlock:
}
static noinline int btrfs_mksnapshot(const struct path *parent,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
const char *name, int namelen,
struct btrfs_root *root,
bool readonly,
@@ -1037,7 +1037,7 @@ static noinline int btrfs_mksnapshot(const struct path *parent,
btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
- ret = btrfs_mksubvol(parent, mnt_userns, name, namelen,
+ ret = btrfs_mksubvol(parent, idmap, name, namelen,
root, readonly, inherit);
out:
if (snapshot_force_cow)
@@ -1240,7 +1240,7 @@ out_drop:
}
static noinline int __btrfs_ioctl_snap_create(struct file *file,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
const char *name, unsigned long fd, int subvol,
bool readonly,
struct btrfs_qgroup_inherit *inherit)
@@ -1268,7 +1268,7 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,
}
if (subvol) {
- ret = btrfs_mksubvol(&file->f_path, mnt_userns, name,
+ ret = btrfs_mksubvol(&file->f_path, idmap, name,
namelen, NULL, readonly, inherit);
} else {
struct fd src = fdget(fd);
@@ -1283,14 +1283,14 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,
btrfs_info(BTRFS_I(file_inode(file))->root->fs_info,
"Snapshot src from another FS");
ret = -EXDEV;
- } else if (!inode_owner_or_capable(mnt_userns, src_inode)) {
+ } else if (!inode_owner_or_capable(idmap, src_inode)) {
/*
* Subvolume creation is not restricted, but snapshots
* are limited to own subvolumes only
*/
ret = -EPERM;
} else {
- ret = btrfs_mksnapshot(&file->f_path, mnt_userns,
+ ret = btrfs_mksnapshot(&file->f_path, idmap,
name, namelen,
BTRFS_I(src_inode)->root,
readonly, inherit);
@@ -1317,7 +1317,7 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
return PTR_ERR(vol_args);
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
- ret = __btrfs_ioctl_snap_create(file, file_mnt_user_ns(file),
+ ret = __btrfs_ioctl_snap_create(file, file_mnt_idmap(file),
vol_args->name, vol_args->fd, subvol,
false, NULL);
@@ -1377,7 +1377,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
}
}
- ret = __btrfs_ioctl_snap_create(file, file_mnt_user_ns(file),
+ ret = __btrfs_ioctl_snap_create(file, file_mnt_idmap(file),
vol_args->name, vol_args->fd, subvol,
readonly, inherit);
if (ret)
@@ -1422,7 +1422,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
u64 flags;
int ret = 0;
- if (!inode_owner_or_capable(file_mnt_user_ns(file), inode))
+ if (!inode_owner_or_capable(file_mnt_idmap(file), inode))
return -EPERM;
ret = mnt_want_write_file(file);
@@ -1870,7 +1870,7 @@ out:
return ret;
}
-static int btrfs_search_path_in_tree_user(struct user_namespace *mnt_userns,
+static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
struct inode *inode,
struct btrfs_ioctl_ino_lookup_user_args *args)
{
@@ -1962,7 +1962,7 @@ static int btrfs_search_path_in_tree_user(struct user_namespace *mnt_userns,
ret = PTR_ERR(temp_inode);
goto out_put;
}
- ret = inode_permission(mnt_userns, temp_inode,
+ ret = inode_permission(idmap, temp_inode,
MAY_READ | MAY_EXEC);
iput(temp_inode);
if (ret) {
@@ -2101,7 +2101,7 @@ static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp)
return -EACCES;
}
- ret = btrfs_search_path_in_tree_user(file_mnt_user_ns(file), inode, args);
+ ret = btrfs_search_path_in_tree_user(file_mnt_idmap(file), inode, args);
if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
ret = -EFAULT;
@@ -2335,7 +2335,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
struct btrfs_root *dest = NULL;
struct btrfs_ioctl_vol_args *vol_args = NULL;
struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL;
- struct user_namespace *mnt_userns = file_mnt_user_ns(file);
+ struct mnt_idmap *idmap = file_mnt_idmap(file);
char *subvol_name, *subvol_name_ptr = NULL;
int subvol_namelen;
int err = 0;
@@ -2428,7 +2428,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
* anywhere in the filesystem the user wouldn't be able
* to delete without an idmapped mount.
*/
- if (old_dir != dir && mnt_userns != &init_user_ns) {
+ if (old_dir != dir && idmap != &nop_mnt_idmap) {
err = -EOPNOTSUPP;
goto free_parent;
}
@@ -2471,7 +2471,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
err = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
if (err == -EINTR)
goto free_subvol_name;
- dentry = lookup_one(mnt_userns, subvol_name, parent, subvol_namelen);
+ dentry = lookup_one(idmap, subvol_name, parent, subvol_namelen);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
goto out_unlock_dir;
@@ -2513,13 +2513,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
if (root == dest)
goto out_dput;
- err = inode_permission(mnt_userns, inode, MAY_WRITE | MAY_EXEC);
+ err = inode_permission(idmap, inode, MAY_WRITE | MAY_EXEC);
if (err)
goto out_dput;
}
/* check if subvolume may be deleted by a user */
- err = btrfs_may_delete(mnt_userns, dir, dentry, 1);
+ err = btrfs_may_delete(idmap, dir, dentry, 1);
if (err)
goto out_dput;
@@ -2582,7 +2582,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
* running and allows defrag on files open in read-only mode.
*/
if (!capable(CAP_SYS_ADMIN) &&
- inode_permission(&init_user_ns, inode, MAY_WRITE)) {
+ inode_permission(&nop_mnt_idmap, inode, MAY_WRITE)) {
ret = -EPERM;
goto out;
}
@@ -3907,7 +3907,7 @@ static long btrfs_ioctl_quota_rescan_wait(struct btrfs_fs_info *fs_info,
}
static long _btrfs_ioctl_set_received_subvol(struct file *file,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct btrfs_ioctl_received_subvol_args *sa)
{
struct inode *inode = file_inode(file);
@@ -3919,7 +3919,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
int ret = 0;
int received_uuid_changed;
- if (!inode_owner_or_capable(mnt_userns, inode))
+ if (!inode_owner_or_capable(idmap, inode))
return -EPERM;
ret = mnt_want_write_file(file);
@@ -4024,7 +4024,7 @@ static long btrfs_ioctl_set_received_subvol_32(struct file *file,
args64->rtime.nsec = args32->rtime.nsec;
args64->flags = args32->flags;
- ret = _btrfs_ioctl_set_received_subvol(file, file_mnt_user_ns(file), args64);
+ ret = _btrfs_ioctl_set_received_subvol(file, file_mnt_idmap(file), args64);
if (ret)
goto out;
@@ -4058,7 +4058,7 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
if (IS_ERR(sa))
return PTR_ERR(sa);
- ret = _btrfs_ioctl_set_received_subvol(file, file_mnt_user_ns(file), sa);
+ ret = _btrfs_ioctl_set_received_subvol(file, file_mnt_idmap(file), sa);
if (ret)
goto out;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 8a855d5ac2fa..d51b9a2f2f6e 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -6,7 +6,7 @@
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
int btrfs_fileattr_get(struct dentry *dentry, struct fileattr *fa);
-int btrfs_fileattr_set(struct user_namespace *mnt_userns,
+int btrfs_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
int btrfs_ioctl_get_supported_features(void __user *arg);
void btrfs_sync_inode_flags_to_i_flags(struct inode *inode);
diff --git a/fs/btrfs/lru_cache.c b/fs/btrfs/lru_cache.c
new file mode 100644
index 000000000000..0fe0ae54ac67
--- /dev/null
+++ b/fs/btrfs/lru_cache.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/mm.h>
+#include "lru_cache.h"
+#include "messages.h"
+
+/*
+ * Initialize a cache object.
+ *
+ * @cache: The cache.
+ * @max_size: Maximum size (number of entries) for the cache.
+ * Use 0 for unlimited size, it's the user's responsability to
+ * trim the cache in that case.
+ */
+void btrfs_lru_cache_init(struct btrfs_lru_cache *cache, unsigned int max_size)
+{
+ INIT_LIST_HEAD(&cache->lru_list);
+ mt_init(&cache->entries);
+ cache->size = 0;
+ cache->max_size = max_size;
+}
+
+static struct btrfs_lru_cache_entry *match_entry(struct list_head *head, u64 key,
+ u64 gen)
+{
+ struct btrfs_lru_cache_entry *entry;
+
+ list_for_each_entry(entry, head, list) {
+ if (entry->key == key && entry->gen == gen)
+ return entry;
+ }
+
+ return NULL;
+}
+
+/*
+ * Lookup for an entry in the cache.
+ *
+ * @cache: The cache.
+ * @key: The key of the entry we are looking for.
+ * @gen: Generation associated to the key.
+ *
+ * Returns the entry associated with the key or NULL if none found.
+ */
+struct btrfs_lru_cache_entry *btrfs_lru_cache_lookup(struct btrfs_lru_cache *cache,
+ u64 key, u64 gen)
+{
+ struct list_head *head;
+ struct btrfs_lru_cache_entry *entry;
+
+ head = mtree_load(&cache->entries, key);
+ if (!head)
+ return NULL;
+
+ entry = match_entry(head, key, gen);
+ if (entry)
+ list_move_tail(&entry->lru_list, &cache->lru_list);
+
+ return entry;
+}
+
+/*
+ * Remove an entry from the cache.
+ *
+ * @cache: The cache to remove from.
+ * @entry: The entry to remove from the cache.
+ *
+ * Note: this also frees the memory used by the entry.
+ */
+void btrfs_lru_cache_remove(struct btrfs_lru_cache *cache,
+ struct btrfs_lru_cache_entry *entry)
+{
+ struct list_head *prev = entry->list.prev;
+
+ ASSERT(cache->size > 0);
+ ASSERT(!mtree_empty(&cache->entries));
+
+ list_del(&entry->list);
+ list_del(&entry->lru_list);
+
+ if (list_empty(prev)) {
+ struct list_head *head;
+
+ /*
+ * If previous element in the list entry->list is now empty, it
+ * means it's a head entry not pointing to any cached entries,
+ * so remove it from the maple tree and free it.
+ */
+ head = mtree_erase(&cache->entries, entry->key);
+ ASSERT(head == prev);
+ kfree(head);
+ }
+
+ kfree(entry);
+ cache->size--;
+}
+
+/*
+ * Store an entry in the cache.
+ *
+ * @cache: The cache.
+ * @entry: The entry to store.
+ *
+ * Returns 0 on success and < 0 on error.
+ */
+int btrfs_lru_cache_store(struct btrfs_lru_cache *cache,
+ struct btrfs_lru_cache_entry *new_entry,
+ gfp_t gfp)
+{
+ const u64 key = new_entry->key;
+ struct list_head *head;
+ int ret;
+
+ head = kmalloc(sizeof(*head), gfp);
+ if (!head)
+ return -ENOMEM;
+
+ ret = mtree_insert(&cache->entries, key, head, gfp);
+ if (ret == 0) {
+ INIT_LIST_HEAD(head);
+ list_add_tail(&new_entry->list, head);
+ } else if (ret == -EEXIST) {
+ kfree(head);
+ head = mtree_load(&cache->entries, key);
+ ASSERT(head != NULL);
+ if (match_entry(head, key, new_entry->gen) != NULL)
+ return -EEXIST;
+ list_add_tail(&new_entry->list, head);
+ } else if (ret < 0) {
+ kfree(head);
+ return ret;
+ }
+
+ if (cache->max_size > 0 && cache->size == cache->max_size) {
+ struct btrfs_lru_cache_entry *lru_entry;
+
+ lru_entry = list_first_entry(&cache->lru_list,
+ struct btrfs_lru_cache_entry,
+ lru_list);
+ btrfs_lru_cache_remove(cache, lru_entry);
+ }
+
+ list_add_tail(&new_entry->lru_list, &cache->lru_list);
+ cache->size++;
+
+ return 0;
+}
+
+/*
+ * Empty a cache.
+ *
+ * @cache: The cache to empty.
+ *
+ * Removes all entries from the cache.
+ */
+void btrfs_lru_cache_clear(struct btrfs_lru_cache *cache)
+{
+ struct btrfs_lru_cache_entry *entry;
+ struct btrfs_lru_cache_entry *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &cache->lru_list, lru_list)
+ btrfs_lru_cache_remove(cache, entry);
+
+ ASSERT(cache->size == 0);
+ ASSERT(mtree_empty(&cache->entries));
+}
diff --git a/fs/btrfs/lru_cache.h b/fs/btrfs/lru_cache.h
new file mode 100644
index 000000000000..de3e18bce24a
--- /dev/null
+++ b/fs/btrfs/lru_cache.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef BTRFS_LRU_CACHE_H
+#define BTRFS_LRU_CACHE_H
+
+#include <linux/maple_tree.h>
+#include <linux/list.h>
+
+/*
+ * A cache entry. This is meant to be embedded in a structure of a user of
+ * this module. Similar to how struct list_head and struct rb_node are used.
+ *
+ * Note: it should be embedded as the first element in a struct (offset 0), and
+ * this module assumes it was allocated with kmalloc(), so it calls kfree() when
+ * it needs to free an entry.
+ */
+struct btrfs_lru_cache_entry {
+ struct list_head lru_list;
+ u64 key;
+ /*
+ * Optional generation associated to a key. Use 0 if not needed/used.
+ * Entries with the same key and different generations are stored in a
+ * linked list, so use this only for cases where there's a small number
+ * of different generations.
+ */
+ u64 gen;
+ /*
+ * The maple tree uses unsigned long type for the keys, which is 32 bits
+ * on 32 bits systems, and 64 bits on 64 bits systems. So if we want to
+ * use something like inode numbers as keys, which are always a u64, we
+ * have to deal with this in a special way - we store the key in the
+ * entry itself, as a u64, and the values inserted into the maple tree
+ * are linked lists of entries - so in case we are on a 64 bits system,
+ * that list always has a single entry, while on 32 bits systems it
+ * may have more than one, with each entry having the same value for
+ * their lower 32 bits of the u64 key.
+ */
+ struct list_head list;
+};
+
+struct btrfs_lru_cache {
+ struct list_head lru_list;
+ struct maple_tree entries;
+ /* Number of entries stored in the cache. */
+ unsigned int size;
+ /* Maximum number of entries the cache can have. */
+ unsigned int max_size;
+};
+
+#define btrfs_lru_cache_for_each_entry_safe(cache, entry, tmp) \
+ list_for_each_entry_safe_reverse((entry), (tmp), &(cache)->lru_list, lru_list)
+
+static inline unsigned int btrfs_lru_cache_size(const struct btrfs_lru_cache *cache)
+{
+ return cache->size;
+}
+
+static inline bool btrfs_lru_cache_is_full(const struct btrfs_lru_cache *cache)
+{
+ return cache->size >= cache->max_size;
+}
+
+static inline struct btrfs_lru_cache_entry *btrfs_lru_cache_lru_entry(
+ struct btrfs_lru_cache *cache)
+{
+ return list_first_entry_or_null(&cache->lru_list,
+ struct btrfs_lru_cache_entry, lru_list);
+}
+
+void btrfs_lru_cache_init(struct btrfs_lru_cache *cache, unsigned int max_size);
+struct btrfs_lru_cache_entry *btrfs_lru_cache_lookup(struct btrfs_lru_cache *cache,
+ u64 key, u64 gen);
+int btrfs_lru_cache_store(struct btrfs_lru_cache *cache,
+ struct btrfs_lru_cache_entry *new_entry,
+ gfp_t gfp);
+void btrfs_lru_cache_remove(struct btrfs_lru_cache *cache,
+ struct btrfs_lru_cache_entry *entry);
+void btrfs_lru_cache_clear(struct btrfs_lru_cache *cache);
+
+#endif
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index d5e78cbc8fbc..71f6d8302d50 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -280,7 +280,7 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
}
/* Check if we have reached page boundary */
- if (IS_ALIGNED(cur_in, PAGE_SIZE)) {
+ if (PAGE_ALIGNED(cur_in)) {
put_page(page_in);
page_in = NULL;
}
diff --git a/fs/btrfs/messages.c b/fs/btrfs/messages.c
index 625bbbbb2608..fde5aaa6e7c9 100644
--- a/fs/btrfs/messages.c
+++ b/fs/btrfs/messages.c
@@ -293,36 +293,6 @@ void __cold btrfs_err_32bit_limit(struct btrfs_fs_info *fs_info)
#endif
/*
- * We only mark the transaction aborted and then set the file system read-only.
- * This will prevent new transactions from starting or trying to join this
- * one.
- *
- * This means that error recovery at the call site is limited to freeing
- * any local memory allocations and passing the error code up without
- * further cleanup. The transaction should complete as it normally would
- * in the call path but will return -EIO.
- *
- * We'll complete the cleanup in btrfs_end_transaction and
- * btrfs_commit_transaction.
- */
-__cold
-void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
- const char *function,
- unsigned int line, int errno, bool first_hit)
-{
- struct btrfs_fs_info *fs_info = trans->fs_info;
-
- WRITE_ONCE(trans->aborted, errno);
- WRITE_ONCE(trans->transaction->aborted, errno);
- if (first_hit && errno == -ENOSPC)
- btrfs_dump_space_info_for_trans_abort(fs_info);
- /* Wake up anybody who may be waiting on this transaction */
- wake_up(&fs_info->transaction_wait);
- wake_up(&fs_info->transaction_blocked_wait);
- __btrfs_handle_fs_error(fs_info, function, line, errno, NULL);
-}
-
-/*
* __btrfs_panic decodes unexpected, fatal errors from the caller, issues an
* alert, and either panics or BUGs, depending on mount options.
*/
diff --git a/fs/btrfs/messages.h b/fs/btrfs/messages.h
index 190af1f698d9..8c516ee58ff9 100644
--- a/fs/btrfs/messages.h
+++ b/fs/btrfs/messages.h
@@ -6,7 +6,6 @@
#include <linux/types.h>
struct btrfs_fs_info;
-struct btrfs_trans_handle;
static inline __printf(2, 3) __cold
void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
@@ -178,39 +177,6 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
const char * __attribute_const__ btrfs_decode_error(int errno);
-__cold
-void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
- const char *function,
- unsigned int line, int errno, bool first_hit);
-
-bool __cold abort_should_print_stack(int errno);
-
-/*
- * Call btrfs_abort_transaction as early as possible when an error condition is
- * detected, that way the exact stack trace is reported for some errors.
- */
-#define btrfs_abort_transaction(trans, errno) \
-do { \
- bool first = false; \
- /* Report first abort since mount */ \
- if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \
- &((trans)->fs_info->fs_state))) { \
- first = true; \
- if (WARN(abort_should_print_stack(errno), \
- KERN_ERR \
- "BTRFS: Transaction aborted (error %d)\n", \
- (errno))) { \
- /* Stack trace printed. */ \
- } else { \
- btrfs_err((trans)->fs_info, \
- "Transaction aborted (error %d)", \
- (errno)); \
- } \
- } \
- __btrfs_abort_transaction((trans), __func__, \
- __LINE__, (errno), first); \
-} while (0)
-
#define btrfs_handle_fs_error(fs_info, errno, fmt, args...) \
__btrfs_handle_fs_error((fs_info), __func__, __LINE__, \
(errno), fmt, ##args)
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 57d8c72737e1..6c24b69e2d0a 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -616,7 +616,7 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
struct btrfs_ordered_extent *ordered;
ordered = container_of(work, struct btrfs_ordered_extent, flush_work);
- btrfs_start_ordered_extent(ordered, 1);
+ btrfs_start_ordered_extent(ordered);
complete(&ordered->completion);
}
@@ -716,13 +716,12 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
}
/*
- * Used to start IO or wait for a given ordered extent to finish.
+ * Start IO and wait for a given ordered extent to finish.
*
- * If wait is one, this effectively waits on page writeback for all the pages
- * in the extent, and it waits on the io completion code to insert
- * metadata into the btree corresponding to the extent
+ * Wait on page writeback for all the pages in the extent and the IO completion
+ * code to insert metadata into the btree corresponding to the extent.
*/
-void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry, int wait)
+void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry)
{
u64 start = entry->file_offset;
u64 end = start + entry->num_bytes - 1;
@@ -744,12 +743,10 @@ void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry, int wait)
*/
if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
filemap_fdatawrite_range(inode->vfs_inode.i_mapping, start, end);
- if (wait) {
- if (!freespace_inode)
- btrfs_might_wait_for_event(inode->root->fs_info, btrfs_ordered_extent);
- wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
- &entry->flags));
- }
+
+ if (!freespace_inode)
+ btrfs_might_wait_for_event(inode->root->fs_info, btrfs_ordered_extent);
+ wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags));
}
/*
@@ -800,7 +797,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
btrfs_put_ordered_extent(ordered);
break;
}
- btrfs_start_ordered_extent(ordered, 1);
+ btrfs_start_ordered_extent(ordered);
end = ordered->file_offset;
/*
* If the ordered extent had an error save the error but don't
@@ -1061,7 +1058,7 @@ void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
break;
}
unlock_extent(&inode->io_tree, start, end, cachedp);
- btrfs_start_ordered_extent(ordered, 1);
+ btrfs_start_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
}
}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 89f82b78f590..eb40cb39f842 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -157,7 +157,6 @@ struct btrfs_ordered_extent {
* command in a workqueue context
*/
u64 physical;
- struct block_device *bdev;
};
static inline void
@@ -187,7 +186,7 @@ void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
struct btrfs_ordered_sum *sum);
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,
u64 file_offset);
-void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry, int wait);
+void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry);
int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
struct btrfs_ordered_extent *
btrfs_lookup_first_ordered_extent(struct btrfs_inode *inode, u64 file_offset);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 5c636e00d77d..52a7d2fa2284 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1304,7 +1304,7 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
list_del(&quota_root->dirty_list);
btrfs_tree_lock(quota_root->node);
- btrfs_clean_tree_block(quota_root->node);
+ btrfs_clear_buffer_dirty(trans, quota_root->node);
btrfs_tree_unlock(quota_root->node);
btrfs_free_tree_block(trans, btrfs_root_id(quota_root),
quota_root->node, 0, 1);
@@ -2765,9 +2765,19 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
/*
* Old roots should be searched when inserting qgroup
- * extent record
+ * extent record.
+ *
+ * But for INCONSISTENT (NO_ACCOUNTING) -> rescan case,
+ * we may have some record inserted during
+ * NO_ACCOUNTING (thus no old_roots populated), but
+ * later we start rescan, which clears NO_ACCOUNTING,
+ * leaving some inserted records without old_roots
+ * populated.
+ *
+ * Those cases are rare and should not cause too much
+ * time spent during commit_transaction().
*/
- if (WARN_ON(!record->old_roots)) {
+ if (!record->old_roots) {
/* Search commit root to find old_roots */
ret = btrfs_find_all_roots(&ctx, false);
if (ret < 0)
@@ -2787,6 +2797,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
* current root. It's safe inside commit_transaction().
*/
ctx.trans = trans;
+ ctx.time_seq = BTRFS_SEQ_LAST;
ret = btrfs_find_all_roots(&ctx, false);
if (ret < 0)
goto cleanup;
@@ -3356,6 +3367,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
int err = -ENOMEM;
int ret = 0;
bool stopped = false;
+ bool did_leaf_rescans = false;
path = btrfs_alloc_path();
if (!path)
@@ -3376,6 +3388,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
}
err = qgroup_rescan_leaf(trans, path);
+ did_leaf_rescans = true;
if (err > 0)
btrfs_commit_transaction(trans);
@@ -3396,16 +3409,23 @@ out:
mutex_unlock(&fs_info->qgroup_rescan_lock);
/*
- * only update status, since the previous part has already updated the
- * qgroup info.
+ * Only update status, since the previous part has already updated the
+ * qgroup info, and only if we did any actual work. This also prevents
+ * race with a concurrent quota disable, which has already set
+ * fs_info->quota_root to NULL and cleared BTRFS_FS_QUOTA_ENABLED at
+ * btrfs_quota_disable().
*/
- trans = btrfs_start_transaction(fs_info->quota_root, 1);
- if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
+ if (did_leaf_rescans) {
+ trans = btrfs_start_transaction(fs_info->quota_root, 1);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ trans = NULL;
+ btrfs_err(fs_info,
+ "fail to start transaction for status update: %d",
+ err);
+ }
+ } else {
trans = NULL;
- btrfs_err(fs_info,
- "fail to start transaction for status update: %d",
- err);
}
mutex_lock(&fs_info->qgroup_rescan_lock);
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 2d90a6b5eb00..642828c1b299 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -998,7 +998,7 @@ static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
}
/*
- * Return the total numer of errors found in the vertical stripe of @sector_nr.
+ * Return the total number of errors found in the vertical stripe of @sector_nr.
*
* @faila and @failb will also be updated to the first and second stripe
* number of the errors.
@@ -1183,7 +1183,15 @@ not_found:
trace_info->stripe_nr = -1;
}
-/* Generate PQ for one veritical stripe. */
+static inline void bio_list_put(struct bio_list *bio_list)
+{
+ struct bio *bio;
+
+ while ((bio = bio_list_pop(bio_list)))
+ bio_put(bio);
+}
+
+/* Generate PQ for one vertical stripe. */
static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
{
void **pointers = rbio->finish_pointers;
@@ -1228,7 +1236,6 @@ static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio,
struct bio_list *bio_list)
{
- struct bio *bio;
/* The total sector number inside the full stripe. */
int total_sector_nr;
int sectornr;
@@ -1317,8 +1324,7 @@ static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio,
return 0;
error:
- while ((bio = bio_list_pop(bio_list)))
- bio_put(bio);
+ bio_list_put(bio_list);
return -EIO;
}
@@ -1357,7 +1363,7 @@ static void set_rbio_range_error(struct btrfs_raid_bio *rbio, struct bio *bio)
}
/*
- * For subpage case, we can no longer set page Uptodate directly for
+ * For subpage case, we can no longer set page Up-to-date directly for
* stripe_pages[], thus we need to locate the sector.
*/
static struct sector_ptr *find_stripe_sector(struct btrfs_raid_bio *rbio,
@@ -1425,13 +1431,20 @@ static void rbio_update_error_bitmap(struct btrfs_raid_bio *rbio, struct bio *bi
int total_sector_nr = get_bio_sector_nr(rbio, bio);
u32 bio_size = 0;
struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
+ int i;
- bio_for_each_segment_all(bvec, bio, iter_all)
+ bio_for_each_bvec_all(bvec, bio, i)
bio_size += bvec->bv_len;
- bitmap_set(rbio->error_bitmap, total_sector_nr,
- bio_size >> rbio->bioc->fs_info->sectorsize_bits);
+ /*
+ * Since we can have multiple bios touching the error_bitmap, we cannot
+ * call bitmap_set() without protection.
+ *
+ * Instead use set_bit() for each bit, as set_bit() itself is atomic.
+ */
+ for (i = total_sector_nr; i < total_sector_nr +
+ (bio_size >> rbio->bioc->fs_info->sectorsize_bits); i++)
+ set_bit(i, rbio->error_bitmap);
}
/* Verify the data sectors at read time. */
@@ -1490,7 +1503,7 @@ static void raid_wait_read_end_io(struct bio *bio)
wake_up(&rbio->io_wait);
}
-static void submit_read_bios(struct btrfs_raid_bio *rbio,
+static void submit_read_wait_bio_list(struct btrfs_raid_bio *rbio,
struct bio_list *bio_list)
{
struct bio *bio;
@@ -1507,41 +1520,8 @@ static void submit_read_bios(struct btrfs_raid_bio *rbio,
}
submit_bio(bio);
}
-}
-
-static int rmw_assemble_read_bios(struct btrfs_raid_bio *rbio,
- struct bio_list *bio_list)
-{
- struct bio *bio;
- int total_sector_nr;
- int ret = 0;
-
- ASSERT(bio_list_size(bio_list) == 0);
-
- /*
- * Build a list of bios to read all sectors (including data and P/Q).
- *
- * This behaviro is to compensate the later csum verification and
- * recovery.
- */
- for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
- total_sector_nr++) {
- struct sector_ptr *sector;
- int stripe = total_sector_nr / rbio->stripe_nsectors;
- int sectornr = total_sector_nr % rbio->stripe_nsectors;
-
- sector = rbio_stripe_sector(rbio, stripe, sectornr);
- ret = rbio_add_io_sector(rbio, bio_list, sector,
- stripe, sectornr, REQ_OP_READ);
- if (ret)
- goto cleanup;
- }
- return 0;
-cleanup:
- while ((bio = bio_list_pop(bio_list)))
- bio_put(bio);
- return ret;
+ wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
}
static int alloc_rbio_data_pages(struct btrfs_raid_bio *rbio)
@@ -1660,12 +1640,12 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
struct btrfs_raid_bio *rbio;
struct btrfs_plug_cb *plug = NULL;
struct blk_plug_cb *cb;
- int ret = 0;
rbio = alloc_rbio(fs_info, bioc);
if (IS_ERR(rbio)) {
- ret = PTR_ERR(rbio);
- goto fail;
+ bio->bi_status = errno_to_blk_status(PTR_ERR(rbio));
+ bio_endio(bio);
+ return;
}
rbio->operation = BTRFS_RBIO_WRITE;
rbio_add_bio(rbio, bio);
@@ -1674,31 +1654,24 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
* Don't plug on full rbios, just get them out the door
* as quickly as we can
*/
- if (rbio_is_full(rbio))
- goto queue_rbio;
-
- cb = blk_check_plugged(raid_unplug, fs_info, sizeof(*plug));
- if (cb) {
- plug = container_of(cb, struct btrfs_plug_cb, cb);
- if (!plug->info) {
- plug->info = fs_info;
- INIT_LIST_HEAD(&plug->rbio_list);
+ if (!rbio_is_full(rbio)) {
+ cb = blk_check_plugged(raid_unplug, fs_info, sizeof(*plug));
+ if (cb) {
+ plug = container_of(cb, struct btrfs_plug_cb, cb);
+ if (!plug->info) {
+ plug->info = fs_info;
+ INIT_LIST_HEAD(&plug->rbio_list);
+ }
+ list_add_tail(&rbio->plug_list, &plug->rbio_list);
+ return;
}
- list_add_tail(&rbio->plug_list, &plug->rbio_list);
- return;
}
-queue_rbio:
+
/*
* Either we don't have any existing plug, or we're doing a full stripe,
- * can queue the rmw work now.
+ * queue the rmw work now.
*/
start_async_work(rbio, rmw_rbio_work);
-
- return;
-
-fail:
- bio->bi_status = errno_to_blk_status(ret);
- bio_endio(bio);
}
static int verify_one_sector(struct btrfs_raid_bio *rbio,
@@ -1765,7 +1738,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
found_errors = get_rbio_veritical_errors(rbio, sector_nr, &faila,
&failb);
/*
- * No errors in the veritical stripe, skip it. Can happen for recovery
+ * No errors in the vertical stripe, skip it. Can happen for recovery
* which only part of a stripe failed csum check.
*/
if (!found_errors)
@@ -1886,7 +1859,7 @@ pstripe:
sector->uptodate = 1;
}
if (failb >= 0) {
- ret = verify_one_sector(rbio, faila, sector_nr);
+ ret = verify_one_sector(rbio, failb, sector_nr);
if (ret < 0)
goto cleanup;
@@ -1941,14 +1914,25 @@ out:
return ret;
}
-static int recover_assemble_read_bios(struct btrfs_raid_bio *rbio,
- struct bio_list *bio_list)
+static void recover_rbio(struct btrfs_raid_bio *rbio)
{
- struct bio *bio;
+ struct bio_list bio_list = BIO_EMPTY_LIST;
int total_sector_nr;
int ret = 0;
- ASSERT(bio_list_size(bio_list) == 0);
+ /*
+ * Either we're doing recover for a read failure or degraded write,
+ * caller should have set error bitmap correctly.
+ */
+ ASSERT(bitmap_weight(rbio->error_bitmap, rbio->nr_sectors));
+
+ /* For recovery, we need to read all sectors including P/Q. */
+ ret = alloc_rbio_pages(rbio);
+ if (ret < 0)
+ goto out;
+
+ index_rbio_pages(rbio);
+
/*
* Read everything that hasn't failed. However this time we will
* not trust any cached sector.
@@ -1979,78 +1963,32 @@ static int recover_assemble_read_bios(struct btrfs_raid_bio *rbio,
}
sector = rbio_stripe_sector(rbio, stripe, sectornr);
- ret = rbio_add_io_sector(rbio, bio_list, sector, stripe,
+ ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe,
sectornr, REQ_OP_READ);
- if (ret < 0)
- goto error;
+ if (ret < 0) {
+ bio_list_put(&bio_list);
+ goto out;
+ }
}
- return 0;
-error:
- while ((bio = bio_list_pop(bio_list)))
- bio_put(bio);
-
- return -EIO;
-}
-
-static int recover_rbio(struct btrfs_raid_bio *rbio)
-{
- struct bio_list bio_list;
- struct bio *bio;
- int ret;
-
- /*
- * Either we're doing recover for a read failure or degraded write,
- * caller should have set error bitmap correctly.
- */
- ASSERT(bitmap_weight(rbio->error_bitmap, rbio->nr_sectors));
- bio_list_init(&bio_list);
-
- /* For recovery, we need to read all sectors including P/Q. */
- ret = alloc_rbio_pages(rbio);
- if (ret < 0)
- goto out;
-
- index_rbio_pages(rbio);
-
- ret = recover_assemble_read_bios(rbio, &bio_list);
- if (ret < 0)
- goto out;
-
- submit_read_bios(rbio, &bio_list);
- wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
+ submit_read_wait_bio_list(rbio, &bio_list);
ret = recover_sectors(rbio);
-
out:
- while ((bio = bio_list_pop(&bio_list)))
- bio_put(bio);
-
- return ret;
+ rbio_orig_end_io(rbio, errno_to_blk_status(ret));
}
static void recover_rbio_work(struct work_struct *work)
{
struct btrfs_raid_bio *rbio;
- int ret;
rbio = container_of(work, struct btrfs_raid_bio, work);
-
- ret = lock_stripe_add(rbio);
- if (ret == 0) {
- ret = recover_rbio(rbio);
- rbio_orig_end_io(rbio, errno_to_blk_status(ret));
- }
+ if (!lock_stripe_add(rbio))
+ recover_rbio(rbio);
}
static void recover_rbio_work_locked(struct work_struct *work)
{
- struct btrfs_raid_bio *rbio;
- int ret;
-
- rbio = container_of(work, struct btrfs_raid_bio, work);
-
- ret = recover_rbio(rbio);
- rbio_orig_end_io(rbio, errno_to_blk_status(ret));
+ recover_rbio(container_of(work, struct btrfs_raid_bio, work));
}
static void set_rbio_raid6_extra_error(struct btrfs_raid_bio *rbio, int mirror_num)
@@ -2196,11 +2134,9 @@ no_csum:
static int rmw_read_wait_recover(struct btrfs_raid_bio *rbio)
{
- struct bio_list bio_list;
- struct bio *bio;
- int ret;
-
- bio_list_init(&bio_list);
+ struct bio_list bio_list = BIO_EMPTY_LIST;
+ int total_sector_nr;
+ int ret = 0;
/*
* Fill the data csums we need for data verification. We need to fill
@@ -2209,24 +2145,32 @@ static int rmw_read_wait_recover(struct btrfs_raid_bio *rbio)
*/
fill_data_csums(rbio);
- ret = rmw_assemble_read_bios(rbio, &bio_list);
- if (ret < 0)
- goto out;
+ /*
+ * Build a list of bios to read all sectors (including data and P/Q).
+ *
+ * This behavior is to compensate the later csum verification and recovery.
+ */
+ for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
+ total_sector_nr++) {
+ struct sector_ptr *sector;
+ int stripe = total_sector_nr / rbio->stripe_nsectors;
+ int sectornr = total_sector_nr % rbio->stripe_nsectors;
- submit_read_bios(rbio, &bio_list);
- wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
+ sector = rbio_stripe_sector(rbio, stripe, sectornr);
+ ret = rbio_add_io_sector(rbio, &bio_list, sector,
+ stripe, sectornr, REQ_OP_READ);
+ if (ret) {
+ bio_list_put(&bio_list);
+ return ret;
+ }
+ }
/*
* We may or may not have any corrupted sectors (including missing dev
* and csum mismatch), just let recover_sectors() to handle them all.
*/
- ret = recover_sectors(rbio);
- return ret;
-out:
- while ((bio = bio_list_pop(&bio_list)))
- bio_put(bio);
-
- return ret;
+ submit_read_wait_bio_list(rbio, &bio_list);
+ return recover_sectors(rbio);
}
static void raid_wait_write_end_io(struct bio *bio)
@@ -2282,7 +2226,7 @@ static bool need_read_stripe_sectors(struct btrfs_raid_bio *rbio)
return false;
}
-static int rmw_rbio(struct btrfs_raid_bio *rbio)
+static void rmw_rbio(struct btrfs_raid_bio *rbio)
{
struct bio_list bio_list;
int sectornr;
@@ -2294,30 +2238,28 @@ static int rmw_rbio(struct btrfs_raid_bio *rbio)
*/
ret = alloc_rbio_parity_pages(rbio);
if (ret < 0)
- return ret;
+ goto out;
/*
* Either full stripe write, or we have every data sector already
* cached, can go to write path immediately.
*/
- if (rbio_is_full(rbio) || !need_read_stripe_sectors(rbio))
- goto write;
-
- /*
- * Now we're doing sub-stripe write, also need all data stripes to do
- * the full RMW.
- */
- ret = alloc_rbio_data_pages(rbio);
- if (ret < 0)
- return ret;
+ if (!rbio_is_full(rbio) && need_read_stripe_sectors(rbio)) {
+ /*
+ * Now we're doing sub-stripe write, also need all data stripes
+ * to do the full RMW.
+ */
+ ret = alloc_rbio_data_pages(rbio);
+ if (ret < 0)
+ goto out;
- index_rbio_pages(rbio);
+ index_rbio_pages(rbio);
- ret = rmw_read_wait_recover(rbio);
- if (ret < 0)
- return ret;
+ ret = rmw_read_wait_recover(rbio);
+ if (ret < 0)
+ goto out;
+ }
-write:
/*
* At this stage we're not allowed to add any new bios to the
* bio list any more, anyone else that wants to change this stripe
@@ -2348,7 +2290,7 @@ write:
bio_list_init(&bio_list);
ret = rmw_assemble_write_bios(rbio, &bio_list);
if (ret < 0)
- return ret;
+ goto out;
/* We should have at least one bio assembled. */
ASSERT(bio_list_size(&bio_list));
@@ -2365,32 +2307,22 @@ write:
break;
}
}
- return ret;
+out:
+ rbio_orig_end_io(rbio, errno_to_blk_status(ret));
}
static void rmw_rbio_work(struct work_struct *work)
{
struct btrfs_raid_bio *rbio;
- int ret;
rbio = container_of(work, struct btrfs_raid_bio, work);
-
- ret = lock_stripe_add(rbio);
- if (ret == 0) {
- ret = rmw_rbio(rbio);
- rbio_orig_end_io(rbio, errno_to_blk_status(ret));
- }
+ if (lock_stripe_add(rbio) == 0)
+ rmw_rbio(rbio);
}
static void rmw_rbio_work_locked(struct work_struct *work)
{
- struct btrfs_raid_bio *rbio;
- int ret;
-
- rbio = container_of(work, struct btrfs_raid_bio, work);
-
- ret = rmw_rbio(rbio);
- rbio_orig_end_io(rbio, errno_to_blk_status(ret));
+ rmw_rbio(container_of(work, struct btrfs_raid_bio, work));
}
/*
@@ -2498,7 +2430,6 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
struct sector_ptr p_sector = { 0 };
struct sector_ptr q_sector = { 0 };
struct bio_list bio_list;
- struct bio *bio;
int is_replace = 0;
int ret;
@@ -2629,8 +2560,7 @@ submit_write:
return 0;
cleanup:
- while ((bio = bio_list_pop(&bio_list)))
- bio_put(bio);
+ bio_list_put(&bio_list);
return ret;
}
@@ -2646,7 +2576,7 @@ static int recover_scrub_rbio(struct btrfs_raid_bio *rbio)
void **pointers = NULL;
void **unmap_array = NULL;
int sector_nr;
- int ret;
+ int ret = 0;
/*
* @pointers array stores the pointer for each sector.
@@ -2725,15 +2655,12 @@ out:
return ret;
}
-static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio,
- struct bio_list *bio_list)
+static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio)
{
- struct bio *bio;
+ struct bio_list bio_list = BIO_EMPTY_LIST;
int total_sector_nr;
int ret = 0;
- ASSERT(bio_list_size(bio_list) == 0);
-
/* Build a list of bios to read all the missing parts. */
for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
total_sector_nr++) {
@@ -2762,45 +2689,38 @@ static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio,
if (sector->uptodate)
continue;
- ret = rbio_add_io_sector(rbio, bio_list, sector, stripe,
+ ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe,
sectornr, REQ_OP_READ);
- if (ret)
- goto error;
+ if (ret) {
+ bio_list_put(&bio_list);
+ return ret;
+ }
}
+
+ submit_read_wait_bio_list(rbio, &bio_list);
return 0;
-error:
- while ((bio = bio_list_pop(bio_list)))
- bio_put(bio);
- return ret;
}
-static int scrub_rbio(struct btrfs_raid_bio *rbio)
+static void scrub_rbio(struct btrfs_raid_bio *rbio)
{
bool need_check = false;
- struct bio_list bio_list;
int sector_nr;
int ret;
- struct bio *bio;
-
- bio_list_init(&bio_list);
ret = alloc_rbio_essential_pages(rbio);
if (ret)
- goto cleanup;
+ goto out;
bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors);
- ret = scrub_assemble_read_bios(rbio, &bio_list);
+ ret = scrub_assemble_read_bios(rbio);
if (ret < 0)
- goto cleanup;
-
- submit_read_bios(rbio, &bio_list);
- wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
+ goto out;
/* We may have some failures, recover the failed sectors first. */
ret = recover_scrub_rbio(rbio);
if (ret < 0)
- goto cleanup;
+ goto out;
/*
* We have every sector properly prepared. Can finish the scrub
@@ -2817,23 +2737,13 @@ static int scrub_rbio(struct btrfs_raid_bio *rbio)
break;
}
}
- return ret;
-
-cleanup:
- while ((bio = bio_list_pop(&bio_list)))
- bio_put(bio);
-
- return ret;
+out:
+ rbio_orig_end_io(rbio, errno_to_blk_status(ret));
}
static void scrub_rbio_work_locked(struct work_struct *work)
{
- struct btrfs_raid_bio *rbio;
- int ret;
-
- rbio = container_of(work, struct btrfs_raid_bio, work);
- ret = scrub_rbio(rbio);
- rbio_orig_end_io(rbio, errno_to_blk_status(ret));
+ scrub_rbio(container_of(work, struct btrfs_raid_bio, work));
}
void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index 7c73a443939e..df0e0abdeb1f 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -65,7 +65,7 @@ struct btrfs_raid_bio {
/* Number of data stripes (no p/q) */
u8 nr_data;
- /* Numer of all stripes (including P/Q) */
+ /* Number of all stripes (including P/Q) */
u8 real_stripes;
/* How many pages there are for each stripe */
@@ -132,7 +132,7 @@ struct btrfs_raid_bio {
/*
* Checksum buffer if the rbio is for data. The buffer should cover
- * all data sectors (exlcuding P/Q sectors).
+ * all data sectors (excluding P/Q sectors).
*/
u8 *csum_buf;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 31ec4a7658ce..ef13a9d4e370 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2825,7 +2825,7 @@ static noinline_for_stack int prealloc_file_extent_cluster(
*
* Here we have to manually invalidate the range (i_size, PAGE_END + 1).
*/
- if (!IS_ALIGNED(i_size, PAGE_SIZE)) {
+ if (!PAGE_ALIGNED(i_size)) {
struct address_space *mapping = inode->vfs_inode.i_mapping;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
const u32 sectorsize = fs_info->sectorsize;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 52b346795f66..69c93ae333f6 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -229,7 +229,7 @@ struct full_stripe_lock {
};
#ifndef CONFIG_64BIT
-/* This structure is for archtectures whose (void *) is smaller than u64 */
+/* This structure is for architectures whose (void *) is smaller than u64 */
struct scrub_page_private {
u64 logical;
};
@@ -2053,20 +2053,33 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
* a) don't have an extent buffer and
* b) the page is already kmapped
*/
- if (sblock->logical != btrfs_stack_header_bytenr(h))
+ if (sblock->logical != btrfs_stack_header_bytenr(h)) {
sblock->header_error = 1;
-
- if (sector->generation != btrfs_stack_header_generation(h)) {
- sblock->header_error = 1;
- sblock->generation_error = 1;
+ btrfs_warn_rl(fs_info,
+ "tree block %llu mirror %u has bad bytenr, has %llu want %llu",
+ sblock->logical, sblock->mirror_num,
+ btrfs_stack_header_bytenr(h),
+ sblock->logical);
+ goto out;
}
- if (!scrub_check_fsid(h->fsid, sector))
+ if (!scrub_check_fsid(h->fsid, sector)) {
sblock->header_error = 1;
+ btrfs_warn_rl(fs_info,
+ "tree block %llu mirror %u has bad fsid, has %pU want %pU",
+ sblock->logical, sblock->mirror_num,
+ h->fsid, sblock->dev->fs_devices->fsid);
+ goto out;
+ }
- if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
- BTRFS_UUID_SIZE))
+ if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, BTRFS_UUID_SIZE)) {
sblock->header_error = 1;
+ btrfs_warn_rl(fs_info,
+ "tree block %llu mirror %u has bad chunk tree uuid, has %pU want %pU",
+ sblock->logical, sblock->mirror_num,
+ h->chunk_tree_uuid, fs_info->chunk_tree_uuid);
+ goto out;
+ }
shash->tfm = fs_info->csum_shash;
crypto_shash_init(shash);
@@ -2079,9 +2092,27 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
}
crypto_shash_final(shash, calculated_csum);
- if (memcmp(calculated_csum, on_disk_csum, sctx->fs_info->csum_size))
+ if (memcmp(calculated_csum, on_disk_csum, sctx->fs_info->csum_size)) {
sblock->checksum_error = 1;
+ btrfs_warn_rl(fs_info,
+ "tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT,
+ sblock->logical, sblock->mirror_num,
+ CSUM_FMT_VALUE(fs_info->csum_size, on_disk_csum),
+ CSUM_FMT_VALUE(fs_info->csum_size, calculated_csum));
+ goto out;
+ }
+
+ if (sector->generation != btrfs_stack_header_generation(h)) {
+ sblock->header_error = 1;
+ sblock->generation_error = 1;
+ btrfs_warn_rl(fs_info,
+ "tree block %llu mirror %u has bad generation, has %llu want %llu",
+ sblock->logical, sblock->mirror_num,
+ btrfs_stack_header_generation(h),
+ sector->generation);
+ }
+out:
return sblock->header_error || sblock->checksum_error;
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index e65e6b6600a7..e5c963bb873d 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -32,6 +32,7 @@
#include "file-item.h"
#include "ioctl.h"
#include "verity.h"
+#include "lru_cache.h"
/*
* Maximum number of references an extent can have in order for us to attempt to
@@ -80,23 +81,23 @@ struct clone_root {
bool found_ref;
};
-#define SEND_CTX_MAX_NAME_CACHE_SIZE 128
-#define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2)
+#define SEND_MAX_NAME_CACHE_SIZE 256
/*
- * Limit the root_ids array of struct backref_cache_entry to 12 elements.
- * This makes the size of a cache entry to be exactly 128 bytes on x86_64.
+ * Limit the root_ids array of struct backref_cache_entry to 17 elements.
+ * This makes the size of a cache entry to be exactly 192 bytes on x86_64, which
+ * can be satisfied from the kmalloc-192 slab, without wasting any space.
* The most common case is to have a single root for cloning, which corresponds
- * to the send root. Having the user specify more than 11 clone roots is not
+ * to the send root. Having the user specify more than 16 clone roots is not
* common, and in such rare cases we simply don't use caching if the number of
- * cloning roots that lead down to a leaf is more than 12.
+ * cloning roots that lead down to a leaf is more than 17.
*/
-#define SEND_MAX_BACKREF_CACHE_ROOTS 12
+#define SEND_MAX_BACKREF_CACHE_ROOTS 17
/*
* Max number of entries in the cache.
- * With SEND_MAX_BACKREF_CACHE_ROOTS as 12, the size in bytes, excluding
- * maple tree's internal nodes, is 16K.
+ * With SEND_MAX_BACKREF_CACHE_ROOTS as 17, the size in bytes, excluding
+ * maple tree's internal nodes, is 24K.
*/
#define SEND_MAX_BACKREF_CACHE_SIZE 128
@@ -107,15 +108,31 @@ struct clone_root {
* x86_64).
*/
struct backref_cache_entry {
- /* List to link to the cache's lru list. */
- struct list_head list;
- /* The key for this entry in the cache. */
- u64 key;
+ struct btrfs_lru_cache_entry entry;
u64 root_ids[SEND_MAX_BACKREF_CACHE_ROOTS];
/* Number of valid elements in the root_ids array. */
int num_roots;
};
+/* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */
+static_assert(offsetof(struct backref_cache_entry, entry) == 0);
+
+/*
+ * Max number of entries in the cache that stores directories that were already
+ * created. The cache uses raw struct btrfs_lru_cache_entry entries, so it uses
+ * at most 4096 bytes - sizeof(struct btrfs_lru_cache_entry) is 48 bytes, but
+ * the kmalloc-64 slab is used, so we get 4096 bytes (64 bytes * 64).
+ */
+#define SEND_MAX_DIR_CREATED_CACHE_SIZE 64
+
+/*
+ * Max number of entries in the cache that stores directories that were already
+ * created. The cache uses raw struct btrfs_lru_cache_entry entries, so it uses
+ * at most 4096 bytes - sizeof(struct btrfs_lru_cache_entry) is 48 bytes, but
+ * the kmalloc-64 slab is used, so we get 4096 bytes (64 bytes * 64).
+ */
+#define SEND_MAX_DIR_UTIMES_CACHE_SIZE 64
+
struct send_ctx {
struct file *send_filp;
loff_t send_off;
@@ -174,9 +191,7 @@ struct send_ctx {
struct list_head new_refs;
struct list_head deleted_refs;
- struct radix_tree_root name_cache;
- struct list_head name_cache_list;
- int name_cache_size;
+ struct btrfs_lru_cache name_cache;
/*
* The inode we are currently processing. It's not NULL only when we
@@ -285,13 +300,11 @@ struct send_ctx {
struct rb_root rbtree_new_refs;
struct rb_root rbtree_deleted_refs;
- struct {
- u64 last_reloc_trans;
- struct list_head lru_list;
- struct maple_tree entries;
- /* Number of entries stored in the cache. */
- int size;
- } backref_cache;
+ struct btrfs_lru_cache backref_cache;
+ u64 backref_cache_last_reloc_trans;
+
+ struct btrfs_lru_cache dir_created_cache;
+ struct btrfs_lru_cache dir_utimes_cache;
};
struct pending_dir_move {
@@ -321,21 +334,15 @@ struct orphan_dir_info {
u64 ino;
u64 gen;
u64 last_dir_index_offset;
+ u64 dir_high_seq_ino;
};
struct name_cache_entry {
- struct list_head list;
/*
- * radix_tree has only 32bit entries but we need to handle 64bit inums.
- * We use the lower 32bit of the 64bit inum to store it in the tree. If
- * more then one inum would fall into the same entry, we use radix_list
- * to store the additional entries. radix_list is also used to store
- * entries where two entries have the same inum but different
- * generations.
+ * The key in the entry is an inode number, and the generation matches
+ * the inode's generation.
*/
- struct list_head radix_list;
- u64 ino;
- u64 gen;
+ struct btrfs_lru_cache_entry entry;
u64 parent_ino;
u64 parent_gen;
int ret;
@@ -344,6 +351,9 @@ struct name_cache_entry {
char name[];
};
+/* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */
+static_assert(offsetof(struct name_cache_entry, entry) == 0);
+
#define ADVANCE 1
#define ADVANCE_ONLY_NEXT -1
@@ -956,14 +966,12 @@ out:
static int get_inode_gen(struct btrfs_root *root, u64 ino, u64 *gen)
{
int ret;
- struct btrfs_inode_info info;
+ struct btrfs_inode_info info = { 0 };
- if (!gen)
- return -EPERM;
+ ASSERT(gen);
ret = get_inode_info(root, ino, &info);
- if (!ret)
- *gen = info.gen;
+ *gen = info.gen;
return ret;
}
@@ -1388,19 +1396,6 @@ static int iterate_backrefs(u64 ino, u64 offset, u64 num_bytes, u64 root_id,
return 0;
}
-static void empty_backref_cache(struct send_ctx *sctx)
-{
- struct backref_cache_entry *entry;
- struct backref_cache_entry *tmp;
-
- list_for_each_entry_safe(entry, tmp, &sctx->backref_cache.lru_list, list)
- kfree(entry);
-
- INIT_LIST_HEAD(&sctx->backref_cache.lru_list);
- mtree_destroy(&sctx->backref_cache.entries);
- sctx->backref_cache.size = 0;
-}
-
static bool lookup_backref_cache(u64 leaf_bytenr, void *ctx,
const u64 **root_ids_ret, int *root_count_ret)
{
@@ -1408,9 +1403,10 @@ static bool lookup_backref_cache(u64 leaf_bytenr, void *ctx,
struct send_ctx *sctx = bctx->sctx;
struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
const u64 key = leaf_bytenr >> fs_info->sectorsize_bits;
+ struct btrfs_lru_cache_entry *raw_entry;
struct backref_cache_entry *entry;
- if (sctx->backref_cache.size == 0)
+ if (btrfs_lru_cache_size(&sctx->backref_cache) == 0)
return false;
/*
@@ -1424,18 +1420,18 @@ static bool lookup_backref_cache(u64 leaf_bytenr, void *ctx,
* transaction handle or holding fs_info->commit_root_sem, so no need
* to take any lock here.
*/
- if (fs_info->last_reloc_trans > sctx->backref_cache.last_reloc_trans) {
- empty_backref_cache(sctx);
+ if (fs_info->last_reloc_trans > sctx->backref_cache_last_reloc_trans) {
+ btrfs_lru_cache_clear(&sctx->backref_cache);
return false;
}
- entry = mtree_load(&sctx->backref_cache.entries, key);
- if (!entry)
+ raw_entry = btrfs_lru_cache_lookup(&sctx->backref_cache, key, 0);
+ if (!raw_entry)
return false;
+ entry = container_of(raw_entry, struct backref_cache_entry, entry);
*root_ids_ret = entry->root_ids;
*root_count_ret = entry->num_roots;
- list_move_tail(&entry->list, &sctx->backref_cache.lru_list);
return true;
}
@@ -1461,7 +1457,8 @@ static void store_backref_cache(u64 leaf_bytenr, const struct ulist *root_ids,
if (!new_entry)
return;
- new_entry->key = leaf_bytenr >> fs_info->sectorsize_bits;
+ new_entry->entry.key = leaf_bytenr >> fs_info->sectorsize_bits;
+ new_entry->entry.gen = 0;
new_entry->num_roots = 0;
ULIST_ITER_INIT(&uiter);
while ((node = ulist_next(root_ids, &uiter)) != NULL) {
@@ -1489,23 +1486,12 @@ static void store_backref_cache(u64 leaf_bytenr, const struct ulist *root_ids,
* none of the roots is part of the list of roots from which we are
* allowed to clone. Cache the new entry as it's still useful to avoid
* backref walking to determine which roots have a path to the leaf.
+ *
+ * Also use GFP_NOFS because we're called while holding a transaction
+ * handle or while holding fs_info->commit_root_sem.
*/
-
- if (sctx->backref_cache.size >= SEND_MAX_BACKREF_CACHE_SIZE) {
- struct backref_cache_entry *lru_entry;
- struct backref_cache_entry *mt_entry;
-
- lru_entry = list_first_entry(&sctx->backref_cache.lru_list,
- struct backref_cache_entry, list);
- mt_entry = mtree_erase(&sctx->backref_cache.entries, lru_entry->key);
- ASSERT(mt_entry == lru_entry);
- list_del(&mt_entry->list);
- kfree(mt_entry);
- sctx->backref_cache.size--;
- }
-
- ret = mtree_insert(&sctx->backref_cache.entries, new_entry->key,
- new_entry, GFP_NOFS);
+ ret = btrfs_lru_cache_store(&sctx->backref_cache, &new_entry->entry,
+ GFP_NOFS);
ASSERT(ret == 0 || ret == -ENOMEM);
if (ret) {
/* Caching is optional, no worries. */
@@ -1513,17 +1499,13 @@ static void store_backref_cache(u64 leaf_bytenr, const struct ulist *root_ids,
return;
}
- list_add_tail(&new_entry->list, &sctx->backref_cache.lru_list);
-
/*
* We are called from iterate_extent_inodes() while either holding a
* transaction handle or holding fs_info->commit_root_sem, so no need
* to take any lock here.
*/
- if (sctx->backref_cache.size == 0)
- sctx->backref_cache.last_reloc_trans = fs_info->last_reloc_trans;
-
- sctx->backref_cache.size++;
+ if (btrfs_lru_cache_size(&sctx->backref_cache) == 1)
+ sctx->backref_cache_last_reloc_trans = fs_info->last_reloc_trans;
}
static int check_extent_item(u64 bytenr, const struct btrfs_extent_item *ei,
@@ -1886,7 +1868,8 @@ enum inode_state {
inode_state_did_delete,
};
-static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
+static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen,
+ u64 *send_gen, u64 *parent_gen)
{
int ret;
int left_ret;
@@ -1900,6 +1883,8 @@ static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
goto out;
left_ret = (info.nlink == 0) ? -ENOENT : ret;
left_gen = info.gen;
+ if (send_gen)
+ *send_gen = ((left_ret == -ENOENT) ? 0 : info.gen);
if (!sctx->parent_root) {
right_ret = -ENOENT;
@@ -1909,6 +1894,8 @@ static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
goto out;
right_ret = (info.nlink == 0) ? -ENOENT : ret;
right_gen = info.gen;
+ if (parent_gen)
+ *parent_gen = ((right_ret == -ENOENT) ? 0 : info.gen);
}
if (!left_ret && !right_ret) {
@@ -1953,14 +1940,15 @@ out:
return ret;
}
-static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen)
+static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen,
+ u64 *send_gen, u64 *parent_gen)
{
int ret;
if (ino == BTRFS_FIRST_FREE_OBJECTID)
return 1;
- ret = get_cur_inode_state(sctx, ino, gen);
+ ret = get_cur_inode_state(sctx, ino, gen, send_gen, parent_gen);
if (ret < 0)
goto out;
@@ -2121,43 +2109,36 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
const char *name, int name_len,
u64 *who_ino, u64 *who_gen, u64 *who_mode)
{
- int ret = 0;
- u64 gen;
+ int ret;
+ u64 parent_root_dir_gen;
u64 other_inode = 0;
struct btrfs_inode_info info;
if (!sctx->parent_root)
- goto out;
+ return 0;
- ret = is_inode_existent(sctx, dir, dir_gen);
+ ret = is_inode_existent(sctx, dir, dir_gen, NULL, &parent_root_dir_gen);
if (ret <= 0)
- goto out;
+ return 0;
/*
* If we have a parent root we need to verify that the parent dir was
* not deleted and then re-created, if it was then we have no overwrite
* and we can just unlink this entry.
+ *
+ * @parent_root_dir_gen was set to 0 if the inode does not exist in the
+ * parent root.
*/
- if (sctx->parent_root && dir != BTRFS_FIRST_FREE_OBJECTID) {
- ret = get_inode_gen(sctx->parent_root, dir, &gen);
- if (ret < 0 && ret != -ENOENT)
- goto out;
- if (ret) {
- ret = 0;
- goto out;
- }
- if (gen != dir_gen)
- goto out;
- }
+ if (sctx->parent_root && dir != BTRFS_FIRST_FREE_OBJECTID &&
+ parent_root_dir_gen != dir_gen)
+ return 0;
ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
&other_inode);
- if (ret < 0 && ret != -ENOENT)
- goto out;
- if (ret) {
- ret = 0;
- goto out;
- }
+ if (ret == -ENOENT)
+ return 0;
+ else if (ret < 0)
+ return ret;
/*
* Check if the overwritten ref was already processed. If yes, the ref
@@ -2168,18 +2149,15 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
is_waiting_for_move(sctx, other_inode)) {
ret = get_inode_info(sctx->parent_root, other_inode, &info);
if (ret < 0)
- goto out;
+ return ret;
- ret = 1;
*who_ino = other_inode;
*who_gen = info.gen;
*who_mode = info.mode;
- } else {
- ret = 0;
+ return 1;
}
-out:
- return ret;
+ return 0;
}
/*
@@ -2194,47 +2172,43 @@ static int did_overwrite_ref(struct send_ctx *sctx,
u64 ino, u64 ino_gen,
const char *name, int name_len)
{
- int ret = 0;
- u64 gen;
+ int ret;
u64 ow_inode;
+ u64 ow_gen = 0;
+ u64 send_root_dir_gen;
if (!sctx->parent_root)
- goto out;
+ return 0;
- ret = is_inode_existent(sctx, dir, dir_gen);
+ ret = is_inode_existent(sctx, dir, dir_gen, &send_root_dir_gen, NULL);
if (ret <= 0)
- goto out;
+ return ret;
- if (dir != BTRFS_FIRST_FREE_OBJECTID) {
- ret = get_inode_gen(sctx->send_root, dir, &gen);
- if (ret < 0 && ret != -ENOENT)
- goto out;
- if (ret) {
- ret = 0;
- goto out;
- }
- if (gen != dir_gen)
- goto out;
- }
+ /*
+ * @send_root_dir_gen was set to 0 if the inode does not exist in the
+ * send root.
+ */
+ if (dir != BTRFS_FIRST_FREE_OBJECTID && send_root_dir_gen != dir_gen)
+ return 0;
/* check if the ref was overwritten by another ref */
ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
&ow_inode);
- if (ret < 0 && ret != -ENOENT)
- goto out;
- if (ret) {
+ if (ret == -ENOENT) {
/* was never and will never be overwritten */
- ret = 0;
- goto out;
+ return 0;
+ } else if (ret < 0) {
+ return ret;
}
- ret = get_inode_gen(sctx->send_root, ow_inode, &gen);
- if (ret < 0)
- goto out;
+ if (ow_inode == ino) {
+ ret = get_inode_gen(sctx->send_root, ow_inode, &ow_gen);
+ if (ret < 0)
+ return ret;
- if (ow_inode == ino && gen == ino_gen) {
- ret = 0;
- goto out;
+ /* It's the same inode, so no overwrite happened. */
+ if (ow_gen == ino_gen)
+ return 0;
}
/*
@@ -2243,15 +2217,20 @@ static int did_overwrite_ref(struct send_ctx *sctx,
* inode 'ino' to be orphanized, therefore check if ow_inode matches
* the current inode being processed.
*/
- if ((ow_inode < sctx->send_progress) ||
- (ino != sctx->cur_ino && ow_inode == sctx->cur_ino &&
- gen == sctx->cur_inode_gen))
- ret = 1;
- else
- ret = 0;
+ if (ow_inode < sctx->send_progress)
+ return 1;
-out:
- return ret;
+ if (ino != sctx->cur_ino && ow_inode == sctx->cur_ino) {
+ if (ow_gen == 0) {
+ ret = get_inode_gen(sctx->send_root, ow_inode, &ow_gen);
+ if (ret < 0)
+ return ret;
+ }
+ if (ow_gen == sctx->cur_inode_gen)
+ return 1;
+ }
+
+ return 0;
}
/*
@@ -2285,113 +2264,16 @@ out:
return ret;
}
-/*
- * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit,
- * so we need to do some special handling in case we have clashes. This function
- * takes care of this with the help of name_cache_entry::radix_list.
- * In case of error, nce is kfreed.
- */
-static int name_cache_insert(struct send_ctx *sctx,
- struct name_cache_entry *nce)
+static inline struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
+ u64 ino, u64 gen)
{
- int ret = 0;
- struct list_head *nce_head;
-
- nce_head = radix_tree_lookup(&sctx->name_cache,
- (unsigned long)nce->ino);
- if (!nce_head) {
- nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL);
- if (!nce_head) {
- kfree(nce);
- return -ENOMEM;
- }
- INIT_LIST_HEAD(nce_head);
-
- ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
- if (ret < 0) {
- kfree(nce_head);
- kfree(nce);
- return ret;
- }
- }
- list_add_tail(&nce->radix_list, nce_head);
- list_add_tail(&nce->list, &sctx->name_cache_list);
- sctx->name_cache_size++;
-
- return ret;
-}
+ struct btrfs_lru_cache_entry *entry;
-static void name_cache_delete(struct send_ctx *sctx,
- struct name_cache_entry *nce)
-{
- struct list_head *nce_head;
-
- nce_head = radix_tree_lookup(&sctx->name_cache,
- (unsigned long)nce->ino);
- if (!nce_head) {
- btrfs_err(sctx->send_root->fs_info,
- "name_cache_delete lookup failed ino %llu cache size %d, leaking memory",
- nce->ino, sctx->name_cache_size);
- }
-
- list_del(&nce->radix_list);
- list_del(&nce->list);
- sctx->name_cache_size--;
-
- /*
- * We may not get to the final release of nce_head if the lookup fails
- */
- if (nce_head && list_empty(nce_head)) {
- radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino);
- kfree(nce_head);
- }
-}
-
-static struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
- u64 ino, u64 gen)
-{
- struct list_head *nce_head;
- struct name_cache_entry *cur;
-
- nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino);
- if (!nce_head)
+ entry = btrfs_lru_cache_lookup(&sctx->name_cache, ino, gen);
+ if (!entry)
return NULL;
- list_for_each_entry(cur, nce_head, radix_list) {
- if (cur->ino == ino && cur->gen == gen)
- return cur;
- }
- return NULL;
-}
-
-/*
- * Remove some entries from the beginning of name_cache_list.
- */
-static void name_cache_clean_unused(struct send_ctx *sctx)
-{
- struct name_cache_entry *nce;
-
- if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE)
- return;
-
- while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) {
- nce = list_entry(sctx->name_cache_list.next,
- struct name_cache_entry, list);
- name_cache_delete(sctx, nce);
- kfree(nce);
- }
-}
-
-static void name_cache_free(struct send_ctx *sctx)
-{
- struct name_cache_entry *nce;
-
- while (!list_empty(&sctx->name_cache_list)) {
- nce = list_entry(sctx->name_cache_list.next,
- struct name_cache_entry, list);
- name_cache_delete(sctx, nce);
- kfree(nce);
- }
+ return container_of(entry, struct name_cache_entry, entry);
}
/*
@@ -2410,7 +2292,7 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
{
int ret;
int nce_ret;
- struct name_cache_entry *nce = NULL;
+ struct name_cache_entry *nce;
/*
* First check if we already did a call to this function with the same
@@ -2420,17 +2302,9 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
nce = name_cache_search(sctx, ino, gen);
if (nce) {
if (ino < sctx->send_progress && nce->need_later_update) {
- name_cache_delete(sctx, nce);
- kfree(nce);
+ btrfs_lru_cache_remove(&sctx->name_cache, &nce->entry);
nce = NULL;
} else {
- /*
- * Removes the entry from the list and adds it back to
- * the end. This marks the entry as recently used so
- * that name_cache_clean_unused does not remove it.
- */
- list_move_tail(&nce->list, &sctx->name_cache_list);
-
*parent_ino = nce->parent_ino;
*parent_gen = nce->parent_gen;
ret = fs_path_add(dest, nce->name, nce->name_len);
@@ -2446,7 +2320,7 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
* This should only happen for the parent dir that we determine in
* record_new_ref_if_needed().
*/
- ret = is_inode_existent(sctx, ino, gen);
+ ret = is_inode_existent(sctx, ino, gen, NULL, NULL);
if (ret < 0)
goto out;
@@ -2497,8 +2371,8 @@ out_cache:
goto out;
}
- nce->ino = ino;
- nce->gen = gen;
+ nce->entry.key = ino;
+ nce->entry.gen = gen;
nce->parent_ino = *parent_ino;
nce->parent_gen = *parent_gen;
nce->name_len = fs_path_len(dest);
@@ -2510,10 +2384,11 @@ out_cache:
else
nce->need_later_update = 1;
- nce_ret = name_cache_insert(sctx, nce);
- if (nce_ret < 0)
+ nce_ret = btrfs_lru_cache_store(&sctx->name_cache, &nce->entry, GFP_KERNEL);
+ if (nce_ret < 0) {
+ kfree(nce);
ret = nce_ret;
- name_cache_clean_unused(sctx);
+ }
out:
return ret;
@@ -2884,6 +2759,63 @@ out:
}
/*
+ * If the cache is full, we can't remove entries from it and do a call to
+ * send_utimes() for each respective inode, because we might be finishing
+ * processing an inode that is a directory and it just got renamed, and existing
+ * entries in the cache may refer to inodes that have the directory in their
+ * full path - in which case we would generate outdated paths (pre-rename)
+ * for the inodes that the cache entries point to. Instead of prunning the
+ * cache when inserting, do it after we finish processing each inode at
+ * finish_inode_if_needed().
+ */
+static int cache_dir_utimes(struct send_ctx *sctx, u64 dir, u64 gen)
+{
+ struct btrfs_lru_cache_entry *entry;
+ int ret;
+
+ entry = btrfs_lru_cache_lookup(&sctx->dir_utimes_cache, dir, gen);
+ if (entry != NULL)
+ return 0;
+
+ /* Caching is optional, don't fail if we can't allocate memory. */
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return send_utimes(sctx, dir, gen);
+
+ entry->key = dir;
+ entry->gen = gen;
+
+ ret = btrfs_lru_cache_store(&sctx->dir_utimes_cache, entry, GFP_KERNEL);
+ ASSERT(ret != -EEXIST);
+ if (ret) {
+ kfree(entry);
+ return send_utimes(sctx, dir, gen);
+ }
+
+ return 0;
+}
+
+static int trim_dir_utimes_cache(struct send_ctx *sctx)
+{
+ while (btrfs_lru_cache_size(&sctx->dir_utimes_cache) >
+ SEND_MAX_DIR_UTIMES_CACHE_SIZE) {
+ struct btrfs_lru_cache_entry *lru;
+ int ret;
+
+ lru = btrfs_lru_cache_lru_entry(&sctx->dir_utimes_cache);
+ ASSERT(lru != NULL);
+
+ ret = send_utimes(sctx, lru->key, lru->gen);
+ if (ret)
+ return ret;
+
+ btrfs_lru_cache_remove(&sctx->dir_utimes_cache, lru);
+ }
+
+ return 0;
+}
+
+/*
* Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have
* a valid path yet because we did not process the refs yet. So, the inode
* is created as orphan.
@@ -2971,6 +2903,23 @@ out:
return ret;
}
+static void cache_dir_created(struct send_ctx *sctx, u64 dir)
+{
+ struct btrfs_lru_cache_entry *entry;
+ int ret;
+
+ /* Caching is optional, ignore any failures. */
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return;
+
+ entry->key = dir;
+ entry->gen = 0;
+ ret = btrfs_lru_cache_store(&sctx->dir_created_cache, entry, GFP_KERNEL);
+ if (ret < 0)
+ kfree(entry);
+}
+
/*
* We need some special handling for inodes that get processed before the parent
* directory got created. See process_recorded_refs for details.
@@ -2986,6 +2935,9 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir)
struct btrfs_key di_key;
struct btrfs_dir_item *di;
+ if (btrfs_lru_cache_lookup(&sctx->dir_created_cache, dir, 0))
+ return 1;
+
path = alloc_path_for_send();
if (!path)
return -ENOMEM;
@@ -3009,6 +2961,7 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir)
if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
di_key.objectid < sctx->send_progress) {
ret = 1;
+ cache_dir_created(sctx, dir);
break;
}
}
@@ -3038,7 +2991,12 @@ static int send_create_inode_if_needed(struct send_ctx *sctx)
return 0;
}
- return send_create_inode(sctx, sctx->cur_ino);
+ ret = send_create_inode(sctx, sctx->cur_ino);
+
+ if (ret == 0 && S_ISDIR(sctx->cur_inode_mode))
+ cache_dir_created(sctx, sctx->cur_ino);
+
+ return ret;
}
struct recorded_ref {
@@ -3166,6 +3124,7 @@ static struct orphan_dir_info *add_orphan_dir_info(struct send_ctx *sctx,
odi->ino = dir_ino;
odi->gen = dir_gen;
odi->last_dir_index_offset = 0;
+ odi->dir_high_seq_ino = 0;
rb_link_node(&odi->node, parent, p);
rb_insert_color(&odi->node, &sctx->orphan_dirs);
@@ -3215,8 +3174,7 @@ static void free_orphan_dir_info(struct send_ctx *sctx,
* We check this by iterating all dir items and checking if the inode behind
* the dir item was already processed.
*/
-static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
- u64 send_progress)
+static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen)
{
int ret = 0;
int iter_ret = 0;
@@ -3227,6 +3185,8 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
struct btrfs_key loc;
struct btrfs_dir_item *di;
struct orphan_dir_info *odi = NULL;
+ u64 dir_high_seq_ino = 0;
+ u64 last_dir_index_offset = 0;
/*
* Don't try to rmdir the top/root subvolume dir.
@@ -3234,17 +3194,62 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
if (dir == BTRFS_FIRST_FREE_OBJECTID)
return 0;
+ odi = get_orphan_dir_info(sctx, dir, dir_gen);
+ if (odi && sctx->cur_ino < odi->dir_high_seq_ino)
+ return 0;
+
path = alloc_path_for_send();
if (!path)
return -ENOMEM;
+ if (!odi) {
+ /*
+ * Find the inode number associated with the last dir index
+ * entry. This is very likely the inode with the highest number
+ * of all inodes that have an entry in the directory. We can
+ * then use it to avoid future calls to can_rmdir(), when
+ * processing inodes with a lower number, from having to search
+ * the parent root b+tree for dir index keys.
+ */
+ key.objectid = dir;
+ key.type = BTRFS_DIR_INDEX_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ /* Can't happen, the root is never empty. */
+ ASSERT(path->slots[0] > 0);
+ if (WARN_ON(path->slots[0] == 0)) {
+ ret = -EUCLEAN;
+ goto out;
+ }
+ path->slots[0]--;
+ }
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.objectid != dir || key.type != BTRFS_DIR_INDEX_KEY) {
+ /* No index keys, dir can be removed. */
+ ret = 1;
+ goto out;
+ }
+
+ di = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_dir_item);
+ btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
+ dir_high_seq_ino = loc.objectid;
+ if (sctx->cur_ino < dir_high_seq_ino) {
+ ret = 0;
+ goto out;
+ }
+
+ btrfs_release_path(path);
+ }
+
key.objectid = dir;
key.type = BTRFS_DIR_INDEX_KEY;
- key.offset = 0;
-
- odi = get_orphan_dir_info(sctx, dir, dir_gen);
- if (odi)
- key.offset = odi->last_dir_index_offset;
+ key.offset = (odi ? odi->last_dir_index_offset : 0);
btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
struct waiting_dir_move *dm;
@@ -3257,29 +3262,18 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
struct btrfs_dir_item);
btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
+ dir_high_seq_ino = max(dir_high_seq_ino, loc.objectid);
+ last_dir_index_offset = found_key.offset;
+
dm = get_waiting_dir_move(sctx, loc.objectid);
if (dm) {
- odi = add_orphan_dir_info(sctx, dir, dir_gen);
- if (IS_ERR(odi)) {
- ret = PTR_ERR(odi);
- goto out;
- }
- odi->gen = dir_gen;
- odi->last_dir_index_offset = found_key.offset;
dm->rmdir_ino = dir;
dm->rmdir_gen = dir_gen;
ret = 0;
goto out;
}
- if (loc.objectid > send_progress) {
- odi = add_orphan_dir_info(sctx, dir, dir_gen);
- if (IS_ERR(odi)) {
- ret = PTR_ERR(odi);
- goto out;
- }
- odi->gen = dir_gen;
- odi->last_dir_index_offset = found_key.offset;
+ if (loc.objectid > sctx->cur_ino) {
ret = 0;
goto out;
}
@@ -3294,7 +3288,22 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
out:
btrfs_free_path(path);
- return ret;
+
+ if (ret)
+ return ret;
+
+ if (!odi) {
+ odi = add_orphan_dir_info(sctx, dir, dir_gen);
+ if (IS_ERR(odi))
+ return PTR_ERR(odi);
+
+ odi->gen = dir_gen;
+ }
+
+ odi->last_dir_index_offset = last_dir_index_offset;
+ odi->dir_high_seq_ino = max(odi->dir_high_seq_ino, dir_high_seq_ino);
+
+ return 0;
}
static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
@@ -3579,7 +3588,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
}
gen = odi->gen;
- ret = can_rmdir(sctx, rmdir_ino, gen, sctx->cur_ino);
+ ret = can_rmdir(sctx, rmdir_ino, gen);
if (ret < 0)
goto out;
if (!ret)
@@ -3599,7 +3608,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
}
finish:
- ret = send_utimes(sctx, pm->ino, pm->gen);
+ ret = cache_dir_utimes(sctx, pm->ino, pm->gen);
if (ret < 0)
goto out;
@@ -3619,7 +3628,7 @@ finish:
if (ret < 0)
goto out;
- ret = send_utimes(sctx, cur->dir, cur->dir_gen);
+ ret = cache_dir_utimes(sctx, cur->dir, cur->dir_gen);
if (ret < 0)
goto out;
}
@@ -4242,7 +4251,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
* "testdir_2".
*/
list_for_each_entry(cur, &sctx->new_refs, list) {
- ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
+ ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen, NULL, NULL);
if (ret < 0)
goto out;
if (ret == inode_state_will_create)
@@ -4288,12 +4297,9 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
* the source path when performing its rename
* operation.
*/
- if (is_waiting_for_move(sctx, ow_inode)) {
- wdm = get_waiting_dir_move(sctx,
- ow_inode);
- ASSERT(wdm);
+ wdm = get_waiting_dir_move(sctx, ow_inode);
+ if (wdm)
wdm->orphanized = true;
- }
/*
* Make sure we clear our orphanized inode's
@@ -4306,10 +4312,9 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
* and get instead the orphan name.
*/
nce = name_cache_search(sctx, ow_inode, ow_gen);
- if (nce) {
- name_cache_delete(sctx, nce);
- kfree(nce);
- }
+ if (nce)
+ btrfs_lru_cache_remove(&sctx->name_cache,
+ &nce->entry);
/*
* ow_inode might currently be an ancestor of
@@ -4358,7 +4363,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
* parent directory out of order. But we need to check if this
* did already happen before due to other refs in the same dir.
*/
- ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
+ ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen, NULL, NULL);
if (ret < 0)
goto out;
if (ret == inode_state_will_create) {
@@ -4388,6 +4393,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
ret = send_create_inode(sctx, cur->dir);
if (ret < 0)
goto out;
+ cache_dir_created(sctx, cur->dir);
}
}
@@ -4470,8 +4476,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
* later, we do this check again and rmdir it then if possible.
* See the use of check_dirs for more details.
*/
- ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen,
- sctx->cur_ino);
+ ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen);
if (ret < 0)
goto out;
if (ret) {
@@ -4564,20 +4569,18 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
if (cur->dir > sctx->cur_ino)
continue;
- ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
+ ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen, NULL, NULL);
if (ret < 0)
goto out;
if (ret == inode_state_did_create ||
ret == inode_state_no_change) {
- /* TODO delayed utimes */
- ret = send_utimes(sctx, cur->dir, cur->dir_gen);
+ ret = cache_dir_utimes(sctx, cur->dir, cur->dir_gen);
if (ret < 0)
goto out;
} else if (ret == inode_state_did_delete &&
cur->dir != last_dir_ino_rm) {
- ret = can_rmdir(sctx, cur->dir, cur->dir_gen,
- sctx->cur_ino);
+ ret = can_rmdir(sctx, cur->dir, cur->dir_gen);
if (ret < 0)
goto out;
if (ret) {
@@ -5635,7 +5638,7 @@ static int send_encoded_extent(struct send_ctx *sctx, struct btrfs_path *path,
* boundary in the send buffer. This means that there may be a gap
* between the beginning of the command and the file data.
*/
- data_offset = ALIGN(sctx->send_size, PAGE_SIZE);
+ data_offset = PAGE_ALIGN(sctx->send_size);
if (data_offset > sctx->send_max_size ||
sctx->send_max_size - data_offset < disk_num_bytes) {
ret = -EOVERFLOW;
@@ -5759,7 +5762,7 @@ static int send_extent_data(struct send_ctx *sctx, struct btrfs_path *path,
sent += size;
}
- if (sctx->clean_page_cache && IS_ALIGNED(end, PAGE_SIZE)) {
+ if (sctx->clean_page_cache && PAGE_ALIGNED(end)) {
/*
* Always operate only on ranges that are a multiple of the page
* size. This is not only to prevent zeroing parts of a page in
@@ -6754,12 +6757,26 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
* it's moved/renamed, therefore we don't need to do it here.
*/
sctx->send_progress = sctx->cur_ino + 1;
- ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
+
+ /*
+ * If the current inode is a non-empty directory, delay issuing
+ * the utimes command for it, as it's very likely we have inodes
+ * with an higher number inside it. We want to issue the utimes
+ * command only after adding all dentries to it.
+ */
+ if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_size > 0)
+ ret = cache_dir_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
+ else
+ ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
+
if (ret < 0)
goto out;
}
out:
+ if (!ret)
+ ret = trim_dir_utimes_cache(sctx);
+
return ret;
}
@@ -8044,6 +8061,8 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
int clone_sources_to_rollback = 0;
size_t alloc_size;
int sort_clone_roots = 0;
+ struct btrfs_lru_cache_entry *entry;
+ struct btrfs_lru_cache_entry *tmp;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -8073,10 +8092,10 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
/*
* Check that we don't overflow at later allocations, we request
* clone_sources_count + 1 items, and compare to unsigned long inside
- * access_ok.
+ * access_ok. Also set an upper limit for allocation size so this can't
+ * easily exhaust memory. Max number of clone sources is about 200K.
*/
- if (arg->clone_sources_count >
- ULONG_MAX / sizeof(struct clone_root) - 1) {
+ if (arg->clone_sources_count > SZ_8M / sizeof(struct clone_root)) {
ret = -EINVAL;
goto out;
}
@@ -8094,11 +8113,22 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
INIT_LIST_HEAD(&sctx->new_refs);
INIT_LIST_HEAD(&sctx->deleted_refs);
- INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL);
- INIT_LIST_HEAD(&sctx->name_cache_list);
- INIT_LIST_HEAD(&sctx->backref_cache.lru_list);
- mt_init(&sctx->backref_cache.entries);
+ btrfs_lru_cache_init(&sctx->name_cache, SEND_MAX_NAME_CACHE_SIZE);
+ btrfs_lru_cache_init(&sctx->backref_cache, SEND_MAX_BACKREF_CACHE_SIZE);
+ btrfs_lru_cache_init(&sctx->dir_created_cache,
+ SEND_MAX_DIR_CREATED_CACHE_SIZE);
+ /*
+ * This cache is periodically trimmed to a fixed size elsewhere, see
+ * cache_dir_utimes() and trim_dir_utimes_cache().
+ */
+ btrfs_lru_cache_init(&sctx->dir_utimes_cache, 0);
+
+ sctx->pending_dir_moves = RB_ROOT;
+ sctx->waiting_dir_moves = RB_ROOT;
+ sctx->orphan_dirs = RB_ROOT;
+ sctx->rbtree_new_refs = RB_ROOT;
+ sctx->rbtree_deleted_refs = RB_ROOT;
sctx->flags = arg->flags;
@@ -8165,12 +8195,6 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
goto out;
}
- sctx->pending_dir_moves = RB_ROOT;
- sctx->waiting_dir_moves = RB_ROOT;
- sctx->orphan_dirs = RB_ROOT;
- sctx->rbtree_new_refs = RB_ROOT;
- sctx->rbtree_deleted_refs = RB_ROOT;
-
sctx->clone_roots = kvcalloc(sizeof(*sctx->clone_roots),
arg->clone_sources_count + 1,
GFP_KERNEL);
@@ -8279,6 +8303,13 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
if (ret < 0)
goto out;
+ btrfs_lru_cache_for_each_entry_safe(&sctx->dir_utimes_cache, entry, tmp) {
+ ret = send_utimes(sctx, entry->key, entry->gen);
+ if (ret < 0)
+ goto out;
+ btrfs_lru_cache_remove(&sctx->dir_utimes_cache, entry);
+ }
+
if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
ret = begin_cmd(sctx, BTRFS_SEND_C_END);
if (ret < 0)
@@ -8358,11 +8389,12 @@ out:
kvfree(sctx->send_buf);
kvfree(sctx->verity_descriptor);
- name_cache_free(sctx);
-
close_current_inode(sctx);
- empty_backref_cache(sctx);
+ btrfs_lru_cache_clear(&sctx->name_cache);
+ btrfs_lru_cache_clear(&sctx->backref_cache);
+ btrfs_lru_cache_clear(&sctx->dir_created_cache);
+ btrfs_lru_cache_clear(&sctx->dir_utimes_cache);
kfree(sctx);
}
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index d28ee4e36f3d..69c09508afb5 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -407,7 +407,8 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
return 0;
used = btrfs_space_info_used(space_info, true);
- if (btrfs_is_zoned(fs_info) && (space_info->flags & BTRFS_BLOCK_GROUP_METADATA))
+ if (test_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags) &&
+ (space_info->flags & BTRFS_BLOCK_GROUP_METADATA))
avail = 0;
else
avail = calc_available_free_space(fs_info, space_info, flush);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 93f52ee85f6f..581845bc206a 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -58,6 +58,7 @@
#include "scrub.h"
#include "verity.h"
#include "super.h"
+#include "extent-tree.h"
#define CREATE_TRACE_POINTS
#include <trace/events/btrfs.h>
@@ -1705,7 +1706,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
if (ret)
goto restore;
- ret = btrfs_check_features(fs_info, sb);
+ ret = btrfs_check_features(fs_info, !(*flags & SB_RDONLY));
if (ret < 0)
goto restore;
@@ -2049,7 +2050,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
}
/*
- * Metadata in mixed block goup profiles are accounted in data
+ * Metadata in mixed block group profiles are accounted in data
*/
if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) {
if (found->flags & BTRFS_BLOCK_GROUP_DATA)
@@ -2514,6 +2515,7 @@ static __always_inline void btrfs_exit_btrfs_fs(void)
static void __exit exit_btrfs_fs(void)
{
btrfs_exit_btrfs_fs();
+ btrfs_cleanup_fs_uuids();
}
static int __init init_btrfs_fs(void)
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 45615ce36498..8c5efa5813b3 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -702,7 +702,7 @@ static void release_raid_kobj(struct kobject *kobj)
kfree(to_raid_kobj(kobj));
}
-static struct kobj_type btrfs_raid_ktype = {
+static const struct kobj_type btrfs_raid_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.release = release_raid_kobj,
.default_groups = raid_groups,
@@ -900,7 +900,7 @@ static void space_info_release(struct kobject *kobj)
kfree(sinfo);
}
-static struct kobj_type space_info_ktype = {
+static const struct kobj_type space_info_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.release = space_info_release,
.default_groups = space_info_groups,
@@ -1259,7 +1259,7 @@ static void btrfs_release_fsid_kobj(struct kobject *kobj)
complete(&fs_devs->kobj_unregister);
}
-static struct kobj_type btrfs_ktype = {
+static const struct kobj_type btrfs_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.release = btrfs_release_fsid_kobj,
};
@@ -1789,7 +1789,7 @@ static void btrfs_release_devid_kobj(struct kobject *kobj)
complete(&device->kobj_unregister);
}
-static struct kobj_type devid_ktype = {
+static const struct kobj_type devid_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.default_groups = devid_groups,
.release = btrfs_release_devid_kobj,
@@ -2103,7 +2103,7 @@ static void qgroups_release(struct kobject *kobj)
kfree(kobj);
}
-static struct kobj_type qgroups_ktype = {
+static const struct kobj_type qgroups_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.default_groups = qgroups_groups,
.release = qgroups_release,
@@ -2173,7 +2173,7 @@ static void qgroup_release(struct kobject *kobj)
memset(&qgroup->kobj, 0, sizeof(*kobj));
}
-static struct kobj_type qgroup_ktype = {
+static const struct kobj_type qgroup_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.release = qgroup_release,
.default_groups = qgroup_groups,
@@ -2272,36 +2272,23 @@ void btrfs_sysfs_del_one_qgroup(struct btrfs_fs_info *fs_info,
* Change per-fs features in /sys/fs/btrfs/UUID/features to match current
* values in superblock. Call after any changes to incompat/compat_ro flags
*/
-void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
- u64 bit, enum btrfs_feature_set set)
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info)
{
- struct btrfs_fs_devices *fs_devs;
struct kobject *fsid_kobj;
- u64 __maybe_unused features;
- int __maybe_unused ret;
+ int ret;
if (!fs_info)
return;
- /*
- * See 14e46e04958df74 and e410e34fad913dd, feature bit updates are not
- * safe when called from some contexts (eg. balance)
- */
- features = get_features(fs_info, set);
- ASSERT(bit & supported_feature_masks[set]);
-
- fs_devs = fs_info->fs_devices;
- fsid_kobj = &fs_devs->fsid_kobj;
-
+ fsid_kobj = &fs_info->fs_devices->fsid_kobj;
if (!fsid_kobj->state_initialized)
return;
- /*
- * FIXME: this is too heavy to update just one value, ideally we'd like
- * to use sysfs_update_group but some refactoring is needed first.
- */
- sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group);
- ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group);
+ ret = sysfs_update_group(fsid_kobj, &btrfs_feature_attr_group);
+ if (ret < 0)
+ btrfs_warn(fs_info,
+ "failed to update /sys/fs/btrfs/%pU/features: %d",
+ fs_info->fs_devices->fsid, ret);
}
int __init btrfs_init_sysfs(void)
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index bacef43f7267..86c7eef12873 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -19,8 +19,7 @@ void btrfs_sysfs_remove_device(struct btrfs_device *device);
int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs);
void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
void btrfs_sysfs_update_sprout_fsid(struct btrfs_fs_devices *fs_devices);
-void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
- u64 bit, enum btrfs_feature_set set);
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info);
void btrfs_kobject_uevent(struct block_device *bdev, enum kobject_action action);
int __init btrfs_init_sysfs(void);
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 181469fc0bb3..ca09cf9afce8 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -64,7 +64,7 @@ struct inode *btrfs_new_test_inode(void)
BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
BTRFS_I(inode)->location.offset = 0;
- inode_init_owner(&init_user_ns, inode, NULL, S_IFREG);
+ inode_init_owner(&nop_mnt_idmap, inode, NULL, S_IFREG);
return inode;
}
diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index c5b3a631bf4f..f2f2e11dac4c 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -509,7 +509,7 @@ static int test_rmap_block(struct btrfs_fs_info *fs_info,
goto out_free;
}
- ret = btrfs_rmap_block(fs_info, em->start, NULL, btrfs_sb_offset(1),
+ ret = btrfs_rmap_block(fs_info, em->start, btrfs_sb_offset(1),
&logical, &out_ndaddrs, &out_stripe_len);
if (ret || (out_ndaddrs == 0 && test->expected_mapped_addr)) {
test_err("didn't rmap anything but expected %d",
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index b8c52e89688c..18329ebcb1cb 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -2464,6 +2464,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
wake_up(&fs_info->transaction_wait);
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
+ /* If we have features changed, wake up the cleaner to update sysfs. */
+ if (test_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags) &&
+ fs_info->cleaner_kthread)
+ wake_up_process(fs_info->cleaner_kthread);
+
ret = btrfs_write_and_wait_transaction(trans);
if (ret) {
btrfs_handle_fs_error(fs_info, ret,
@@ -2604,6 +2609,35 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info)
return (ret < 0) ? 0 : 1;
}
+/*
+ * We only mark the transaction aborted and then set the file system read-only.
+ * This will prevent new transactions from starting or trying to join this
+ * one.
+ *
+ * This means that error recovery at the call site is limited to freeing
+ * any local memory allocations and passing the error code up without
+ * further cleanup. The transaction should complete as it normally would
+ * in the call path but will return -EIO.
+ *
+ * We'll complete the cleanup in btrfs_end_transaction and
+ * btrfs_commit_transaction.
+ */
+void __cold __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
+ const char *function,
+ unsigned int line, int errno, bool first_hit)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+
+ WRITE_ONCE(trans->aborted, errno);
+ WRITE_ONCE(trans->transaction->aborted, errno);
+ if (first_hit && errno == -ENOSPC)
+ btrfs_dump_space_info_for_trans_abort(fs_info);
+ /* Wake up anybody who may be waiting on this transaction */
+ wake_up(&fs_info->transaction_wait);
+ wake_up(&fs_info->transaction_blocked_wait);
+ __btrfs_handle_fs_error(fs_info, function, line, errno, NULL);
+}
+
int __init btrfs_transaction_init(void)
{
btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 97f6c39f59c8..fa728ab80826 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -202,6 +202,34 @@ static inline void btrfs_clear_skip_qgroup(struct btrfs_trans_handle *trans)
delayed_refs->qgroup_to_skip = 0;
}
+bool __cold abort_should_print_stack(int errno);
+
+/*
+ * Call btrfs_abort_transaction as early as possible when an error condition is
+ * detected, that way the exact stack trace is reported for some errors.
+ */
+#define btrfs_abort_transaction(trans, errno) \
+do { \
+ bool first = false; \
+ /* Report first abort since mount */ \
+ if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \
+ &((trans)->fs_info->fs_state))) { \
+ first = true; \
+ if (WARN(abort_should_print_stack(errno), \
+ KERN_ERR \
+ "BTRFS: Transaction aborted (error %d)\n", \
+ (errno))) { \
+ /* Stack trace printed. */ \
+ } else { \
+ btrfs_debug((trans)->fs_info, \
+ "Transaction aborted (error %d)", \
+ (errno)); \
+ } \
+ } \
+ __btrfs_abort_transaction((trans), __func__, \
+ __LINE__, (errno), first); \
+} while (0)
+
int btrfs_end_transaction(struct btrfs_trans_handle *trans);
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
unsigned int num_items);
@@ -236,6 +264,9 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction);
void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
+void __cold __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
+ const char *function,
+ unsigned int line, int errno, bool first_hit);
int __init btrfs_transaction_init(void);
void __cold btrfs_transaction_exit(void);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index a3c43f0b1c95..200cea6e49e5 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -279,12 +279,6 @@ void btrfs_end_log_trans(struct btrfs_root *root)
}
}
-static void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
-{
- filemap_fdatawait_range(buf->pages[0]->mapping,
- buf->start, buf->start + buf->len - 1);
-}
-
/*
* the walk control struct is used to pass state down the chain when
* processing the log tree. The stage field tells us which part
@@ -2623,11 +2617,12 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
return ret;
}
+ btrfs_tree_lock(next);
+ btrfs_clear_buffer_dirty(trans, next);
+ wait_on_extent_buffer_writeback(next);
+ btrfs_tree_unlock(next);
+
if (trans) {
- btrfs_tree_lock(next);
- btrfs_clean_tree_block(next);
- btrfs_wait_tree_block_writeback(next);
- btrfs_tree_unlock(next);
ret = btrfs_pin_reserved_extent(trans,
bytenr, blocksize);
if (ret) {
@@ -2637,8 +2632,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
btrfs_redirty_list_add(
trans->transaction, next);
} else {
- if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
- clear_extent_buffer_dirty(next);
unaccount_log_buffer(fs_info, bytenr);
}
}
@@ -2693,11 +2686,12 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
next = path->nodes[*level];
+ btrfs_tree_lock(next);
+ btrfs_clear_buffer_dirty(trans, next);
+ wait_on_extent_buffer_writeback(next);
+ btrfs_tree_unlock(next);
+
if (trans) {
- btrfs_tree_lock(next);
- btrfs_clean_tree_block(next);
- btrfs_wait_tree_block_writeback(next);
- btrfs_tree_unlock(next);
ret = btrfs_pin_reserved_extent(trans,
path->nodes[*level]->start,
path->nodes[*level]->len);
@@ -2706,9 +2700,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
btrfs_redirty_list_add(trans->transaction,
next);
} else {
- if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
- clear_extent_buffer_dirty(next);
-
unaccount_log_buffer(fs_info,
path->nodes[*level]->start);
}
@@ -2776,19 +2767,18 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
next = path->nodes[orig_level];
+ btrfs_tree_lock(next);
+ btrfs_clear_buffer_dirty(trans, next);
+ wait_on_extent_buffer_writeback(next);
+ btrfs_tree_unlock(next);
+
if (trans) {
- btrfs_tree_lock(next);
- btrfs_clean_tree_block(next);
- btrfs_wait_tree_block_writeback(next);
- btrfs_tree_unlock(next);
ret = btrfs_pin_reserved_extent(trans,
next->start, next->len);
if (ret)
goto out;
btrfs_redirty_list_add(trans->transaction, next);
} else {
- if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
- clear_extent_buffer_dirty(next);
unaccount_log_buffer(fs_info, next->start);
}
}
@@ -2980,7 +2970,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
ret = 0;
if (ret) {
blk_finish_plug(&plug);
- btrfs_abort_transaction(trans, ret);
btrfs_set_log_full_commit(trans);
mutex_unlock(&root->log_mutex);
goto out;
@@ -3045,15 +3034,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
blk_finish_plug(&plug);
btrfs_set_log_full_commit(trans);
-
- if (ret != -ENOSPC) {
- btrfs_abort_transaction(trans, ret);
- mutex_unlock(&log_root_tree->log_mutex);
- goto out;
- }
+ if (ret != -ENOSPC)
+ btrfs_err(fs_info,
+ "failed to update log for root %llu ret %d",
+ root->root_key.objectid, ret);
btrfs_wait_tree_log_extents(log, mark);
mutex_unlock(&log_root_tree->log_mutex);
- ret = BTRFS_LOG_FORCE_COMMIT;
goto out;
}
@@ -3112,7 +3098,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
goto out_wake_log_root;
} else if (ret) {
btrfs_set_log_full_commit(trans);
- btrfs_abort_transaction(trans, ret);
mutex_unlock(&log_root_tree->log_mutex);
goto out_wake_log_root;
}
@@ -3581,17 +3566,19 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
}
static int flush_dir_items_batch(struct btrfs_trans_handle *trans,
- struct btrfs_root *log,
+ struct btrfs_inode *inode,
struct extent_buffer *src,
struct btrfs_path *dst_path,
int start_slot,
int count)
{
+ struct btrfs_root *log = inode->root->log_root;
char *ins_data = NULL;
struct btrfs_item_batch batch;
struct extent_buffer *dst;
unsigned long src_offset;
unsigned long dst_offset;
+ u64 last_index;
struct btrfs_key key;
u32 item_size;
int ret;
@@ -3649,6 +3636,18 @@ static int flush_dir_items_batch(struct btrfs_trans_handle *trans,
src_offset = btrfs_item_ptr_offset(src, start_slot + count - 1);
copy_extent_buffer(dst, src, dst_offset, src_offset, batch.total_data_size);
btrfs_release_path(dst_path);
+
+ last_index = batch.keys[count - 1].offset;
+ ASSERT(last_index > inode->last_dir_index_offset);
+
+ /*
+ * If for some unexpected reason the last item's index is not greater
+ * than the last index we logged, warn and force a transaction commit.
+ */
+ if (WARN_ON(last_index <= inode->last_dir_index_offset))
+ ret = BTRFS_LOG_FORCE_COMMIT;
+ else
+ inode->last_dir_index_offset = last_index;
out:
kfree(ins_data);
@@ -3698,7 +3697,6 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
}
di = btrfs_item_ptr(src, i, struct btrfs_dir_item);
- ctx->last_dir_item_offset = key.offset;
/*
* Skip ranges of items that consist only of dir item keys created
@@ -3761,7 +3759,7 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
if (batch_size > 0) {
int ret;
- ret = flush_dir_items_batch(trans, log, src, dst_path,
+ ret = flush_dir_items_batch(trans, inode, src, dst_path,
batch_start, batch_size);
if (ret < 0)
return ret;
@@ -3785,7 +3783,6 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
struct btrfs_key min_key;
struct btrfs_root *root = inode->root;
struct btrfs_root *log = root->log_root;
- int err = 0;
int ret;
u64 last_old_dentry_offset = min_offset - 1;
u64 last_offset = (u64)-1;
@@ -3826,7 +3823,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
path->slots[0]);
if (tmp.type == BTRFS_DIR_INDEX_KEY)
last_old_dentry_offset = tmp.offset;
+ } else if (ret > 0) {
+ ret = 0;
}
+
goto done;
}
@@ -3846,20 +3846,37 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
*/
if (tmp.type == BTRFS_DIR_INDEX_KEY)
last_old_dentry_offset = tmp.offset;
+ } else if (ret < 0) {
+ goto done;
}
+
btrfs_release_path(path);
/*
- * Find the first key from this transaction again. See the note for
- * log_new_dir_dentries, if we're logging a directory recursively we
- * won't be holding its i_mutex, which means we can modify the directory
- * while we're logging it. If we remove an entry between our first
- * search and this search we'll not find the key again and can just
- * bail.
+ * Find the first key from this transaction again or the one we were at
+ * in the loop below in case we had to reschedule. We may be logging the
+ * directory without holding its VFS lock, which happen when logging new
+ * dentries (through log_new_dir_dentries()) or in some cases when we
+ * need to log the parent directory of an inode. This means a dir index
+ * key might be deleted from the inode's root, and therefore we may not
+ * find it anymore. If we can't find it, just move to the next key. We
+ * can not bail out and ignore, because if we do that we will simply
+ * not log dir index keys that come after the one that was just deleted
+ * and we can end up logging a dir index range that ends at (u64)-1
+ * (@last_offset is initialized to that), resulting in removing dir
+ * entries we should not remove at log replay time.
*/
search:
ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
- if (ret != 0)
+ if (ret > 0) {
+ ret = btrfs_next_item(root, path);
+ if (ret > 0) {
+ /* There are no more keys in the inode's root. */
+ ret = 0;
+ goto done;
+ }
+ }
+ if (ret < 0)
goto done;
/*
@@ -3870,8 +3887,8 @@ search:
ret = process_dir_items_leaf(trans, inode, path, dst_path, ctx,
&last_old_dentry_offset);
if (ret != 0) {
- if (ret < 0)
- err = ret;
+ if (ret > 0)
+ ret = 0;
goto done;
}
path->slots[0] = btrfs_header_nritems(path->nodes[0]);
@@ -3882,10 +3899,10 @@ search:
*/
ret = btrfs_next_leaf(root, path);
if (ret) {
- if (ret == 1)
+ if (ret == 1) {
last_offset = (u64)-1;
- else
- err = ret;
+ ret = 0;
+ }
goto done;
}
btrfs_item_key_to_cpu(path->nodes[0], &min_key, path->slots[0]);
@@ -3916,7 +3933,7 @@ done:
btrfs_release_path(path);
btrfs_release_path(dst_path);
- if (err == 0) {
+ if (ret == 0) {
*last_offset_ret = last_offset;
/*
* In case the leaf was changed in the current transaction but
@@ -3927,15 +3944,13 @@ done:
* a range, last_old_dentry_offset is == to last_offset.
*/
ASSERT(last_old_dentry_offset <= last_offset);
- if (last_old_dentry_offset < last_offset) {
+ if (last_old_dentry_offset < last_offset)
ret = insert_dir_log_key(trans, log, path, ino,
last_old_dentry_offset + 1,
last_offset);
- if (ret)
- err = ret;
- }
}
- return err;
+
+ return ret;
}
/*
@@ -4031,7 +4046,6 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
min_key = BTRFS_DIR_START_INDEX;
max_key = 0;
- ctx->last_dir_item_offset = inode->last_dir_index_offset;
while (1) {
ret = log_dir_items(trans, inode, path, dst_path,
@@ -4043,8 +4057,6 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
min_key = max_key + 1;
}
- inode->last_dir_index_offset = ctx->last_dir_item_offset;
-
return 0;
}
@@ -6440,7 +6452,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
* result in losing the file after a log replay.
*/
if (full_dir_logging && inode->last_unlink_trans >= trans->transid) {
- btrfs_set_log_full_commit(trans);
ret = BTRFS_LOG_FORCE_COMMIT;
goto out_unlock;
}
@@ -7459,8 +7470,11 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
* not fail, but if it does, it's not serious, just bail out and
* mark the log for a full commit.
*/
- if (WARN_ON_ONCE(ret < 0))
+ if (WARN_ON_ONCE(ret < 0)) {
+ fscrypt_free_filename(&fname);
goto out;
+ }
+
log_pinned = true;
path = btrfs_alloc_path();
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 85b43075ac58..bdeb5216718f 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -13,8 +13,13 @@
/* return value for btrfs_log_dentry_safe that means we don't need to log it at all */
#define BTRFS_NO_LOG_SYNC 256
-/* We can't use the tree log for whatever reason, force a transaction commit */
-#define BTRFS_LOG_FORCE_COMMIT (1)
+/*
+ * We can't use the tree log for whatever reason, force a transaction commit.
+ * We use a negative value because there are functions through the logging code
+ * that need to return an error (< 0 value), false (0) or true (1). Any negative
+ * value will do, as it will cause the log to be marked for a full sync.
+ */
+#define BTRFS_LOG_FORCE_COMMIT (-(MAX_ERRNO + 1))
struct btrfs_log_ctx {
int log_ret;
@@ -24,8 +29,6 @@ struct btrfs_log_ctx {
bool logging_new_delayed_dentries;
/* Indicate if the inode being logged was logged before. */
bool logged_before;
- /* Tracks the last logged dir item/index key offset. */
- u64 last_dir_item_offset;
struct inode *inode;
struct list_head list;
/* Only used for fast fsyncs. */
diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c
index bf9eb693a6a7..c5ff16f9e9fa 100644
--- a/fs/btrfs/verity.c
+++ b/fs/btrfs/verity.c
@@ -783,30 +783,25 @@ again:
/*
* fsverity op that writes a Merkle tree block into the btree.
*
- * @inode: inode to write a Merkle tree block for
- * @buf: Merkle tree data block to write
- * @index: index of the block in the Merkle tree
- * @log_blocksize: log base 2 of the Merkle tree block size
- *
- * Note that the block size could be different from the page size, so it is not
- * safe to assume that index is a page index.
+ * @inode: inode to write a Merkle tree block for
+ * @buf: Merkle tree block to write
+ * @pos: the position of the block in the Merkle tree (in bytes)
+ * @size: the Merkle tree block size (in bytes)
*
* Returns 0 on success or negative error code on failure
*/
static int btrfs_write_merkle_tree_block(struct inode *inode, const void *buf,
- u64 index, int log_blocksize)
+ u64 pos, unsigned int size)
{
- u64 off = index << log_blocksize;
- u64 len = 1ULL << log_blocksize;
loff_t merkle_pos = merkle_file_pos(inode);
if (merkle_pos < 0)
return merkle_pos;
- if (merkle_pos > inode->i_sb->s_maxbytes - off - len)
+ if (merkle_pos > inode->i_sb->s_maxbytes - pos - size)
return -EFBIG;
return write_key_bytes(BTRFS_I(inode), BTRFS_VERITY_MERKLE_ITEM_KEY,
- off, buf, len);
+ pos, buf, size);
}
const struct fsverity_operations btrfs_verityops = {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index aa25fa335d3e..7823168c08a6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -403,6 +403,7 @@ void btrfs_free_device(struct btrfs_device *device)
static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
{
struct btrfs_device *device;
+
WARN_ON(fs_devices->opened);
while (!list_empty(&fs_devices->devices)) {
device = list_entry(fs_devices->devices.next,
@@ -727,7 +728,7 @@ static struct btrfs_fs_devices *find_fsid_reverted_metadata(
/*
* Handle the case where the scanned device is part of an fs whose last
* metadata UUID change reverted it to the original FSID. At the same
- * time * fs_devices was first created by another constitutent device
+ * time fs_devices was first created by another constituent device
* which didn't fully observe the operation. This results in an
* btrfs_fs_devices created with metadata/fsid different AND
* btrfs_fs_devices::fsid_change set AND the metadata_uuid of the
@@ -768,8 +769,11 @@ static noinline struct btrfs_device *device_list_add(const char *path,
BTRFS_SUPER_FLAG_CHANGING_FSID_V2);
error = lookup_bdev(path, &path_devt);
- if (error)
+ if (error) {
+ btrfs_err(NULL, "failed to lookup block device for path %s: %d",
+ path, error);
return ERR_PTR(error);
+ }
if (fsid_change_in_progress) {
if (!has_metadata_uuid)
@@ -836,6 +840,9 @@ static noinline struct btrfs_device *device_list_add(const char *path,
unsigned int nofs_flag;
if (fs_devices->opened) {
+ btrfs_err(NULL,
+ "device %s belongs to fsid %pU, and the fs is already mounted",
+ path, fs_devices->fsid);
mutex_unlock(&fs_devices->device_list_mutex);
return ERR_PTR(-EBUSY);
}
@@ -905,6 +912,9 @@ static noinline struct btrfs_device *device_list_add(const char *path,
* generation are equal.
*/
mutex_unlock(&fs_devices->device_list_mutex);
+ btrfs_err(NULL,
+"device %s already registered with a higher generation, found %llu expect %llu",
+ path, found_transid, device->generation);
return ERR_PTR(-EEXIST);
}
@@ -1172,9 +1182,22 @@ void btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
mutex_lock(&uuid_mutex);
close_fs_devices(fs_devices);
- if (!fs_devices->opened)
+ if (!fs_devices->opened) {
list_splice_init(&fs_devices->seed_list, &list);
+ /*
+ * If the struct btrfs_fs_devices is not assembled with any
+ * other device, it can be re-initialized during the next mount
+ * without the needing device-scan step. Therefore, it can be
+ * fully freed.
+ */
+ if (fs_devices->num_devices == 1) {
+ list_del(&fs_devices->fs_list);
+ free_fs_devices(fs_devices);
+ }
+ }
+
+
list_for_each_entry_safe(fs_devices, tmp, &list, seed_list) {
close_fs_devices(fs_devices);
list_del(&fs_devices->seed_list);
@@ -1591,7 +1614,7 @@ again:
if (ret < 0)
goto out;
- while (1) {
+ while (search_start < search_end) {
l = path->nodes[0];
slot = path->slots[0];
if (slot >= btrfs_header_nritems(l)) {
@@ -1614,6 +1637,9 @@ again:
if (key.type != BTRFS_DEV_EXTENT_KEY)
goto next;
+ if (key.offset > search_end)
+ break;
+
if (key.offset > search_start) {
hole_size = key.offset - search_start;
dev_extent_hole_check(device, &search_start, &hole_size,
@@ -1674,6 +1700,7 @@ next:
else
ret = 0;
+ ASSERT(max_hole_start + max_hole_size <= search_end);
out:
btrfs_free_path(path);
*start = max_hole_start;
@@ -2005,42 +2032,42 @@ static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info)
return num_devices;
}
+static void btrfs_scratch_superblock(struct btrfs_fs_info *fs_info,
+ struct block_device *bdev, int copy_num)
+{
+ struct btrfs_super_block *disk_super;
+ const size_t len = sizeof(disk_super->magic);
+ const u64 bytenr = btrfs_sb_offset(copy_num);
+ int ret;
+
+ disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr);
+ if (IS_ERR(disk_super))
+ return;
+
+ memset(&disk_super->magic, 0, len);
+ folio_mark_dirty(virt_to_folio(disk_super));
+ btrfs_release_disk_super(disk_super);
+
+ ret = sync_blockdev_range(bdev, bytenr, bytenr + len - 1);
+ if (ret)
+ btrfs_warn(fs_info, "error clearing superblock number %d (%d)",
+ copy_num, ret);
+}
+
void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
struct block_device *bdev,
const char *device_path)
{
- struct btrfs_super_block *disk_super;
int copy_num;
if (!bdev)
return;
for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX; copy_num++) {
- struct page *page;
- int ret;
-
- disk_super = btrfs_read_dev_one_super(bdev, copy_num, false);
- if (IS_ERR(disk_super))
- continue;
-
- if (bdev_is_zoned(bdev)) {
+ if (bdev_is_zoned(bdev))
btrfs_reset_sb_log_zones(bdev, copy_num);
- continue;
- }
-
- memset(&disk_super->magic, 0, sizeof(disk_super->magic));
-
- page = virt_to_page(disk_super);
- set_page_dirty(page);
- lock_page(page);
- /* write_on_page() unlocks the page */
- ret = write_one_page(page);
- if (ret)
- btrfs_warn(fs_info,
- "error clearing superblock number %d (%d)",
- copy_num, ret);
- btrfs_release_disk_super(disk_super);
-
+ else
+ btrfs_scratch_superblock(fs_info, bdev, copy_num);
}
/* Notify udev that device has changed */
@@ -6257,91 +6284,42 @@ static bool need_full_stripe(enum btrfs_map_op op)
return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS);
}
-/*
- * Calculate the geometry of a particular (address, len) tuple. This
- * information is used to calculate how big a particular bio can get before it
- * straddles a stripe.
- *
- * @fs_info: the filesystem
- * @em: mapping containing the logical extent
- * @op: type of operation - write or read
- * @logical: address that we want to figure out the geometry of
- * @io_geom: pointer used to return values
- *
- * Returns < 0 in case a chunk for the given logical address cannot be found,
- * usually shouldn't happen unless @logical is corrupted, 0 otherwise.
- */
-int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *em,
- enum btrfs_map_op op, u64 logical,
- struct btrfs_io_geometry *io_geom)
+static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op,
+ u64 offset, u64 *stripe_nr, u64 *stripe_offset,
+ u64 *full_stripe_start)
{
- struct map_lookup *map;
- u64 len;
- u64 offset;
- u64 stripe_offset;
- u64 stripe_nr;
- u32 stripe_len;
- u64 raid56_full_stripe_start = (u64)-1;
- int data_stripes;
+ u32 stripe_len = map->stripe_len;
ASSERT(op != BTRFS_MAP_DISCARD);
- map = em->map_lookup;
- /* Offset of this logical address in the chunk */
- offset = logical - em->start;
- /* Len of a stripe in a chunk */
- stripe_len = map->stripe_len;
/*
- * Stripe_nr is where this block falls in
- * stripe_offset is the offset of this block in its stripe.
+ * Stripe_nr is the stripe where this block falls. stripe_offset is
+ * the offset of this block in its stripe.
*/
- stripe_nr = div64_u64_rem(offset, stripe_len, &stripe_offset);
- ASSERT(stripe_offset < U32_MAX);
+ *stripe_nr = div64_u64_rem(offset, stripe_len, stripe_offset);
+ ASSERT(*stripe_offset < U32_MAX);
- data_stripes = nr_data_stripes(map);
+ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+ unsigned long full_stripe_len = stripe_len * nr_data_stripes(map);
- /* Only stripe based profiles needs to check against stripe length. */
- if (map->type & BTRFS_BLOCK_GROUP_STRIPE_MASK) {
- u64 max_len = stripe_len - stripe_offset;
+ *full_stripe_start =
+ div64_u64(offset, full_stripe_len) * full_stripe_len;
/*
- * In case of raid56, we need to know the stripe aligned start
+ * For writes to RAID56, allow to write a full stripe set, but
+ * no straddling of stripe sets.
*/
- if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
- unsigned long full_stripe_len = stripe_len * data_stripes;
- raid56_full_stripe_start = offset;
-
- /*
- * Allow a write of a full stripe, but make sure we
- * don't allow straddling of stripes
- */
- raid56_full_stripe_start = div64_u64(raid56_full_stripe_start,
- full_stripe_len);
- raid56_full_stripe_start *= full_stripe_len;
-
- /*
- * For writes to RAID[56], allow a full stripeset across
- * all disks. For other RAID types and for RAID[56]
- * reads, just allow a single stripe (on a single disk).
- */
- if (op == BTRFS_MAP_WRITE) {
- max_len = stripe_len * data_stripes -
- (offset - raid56_full_stripe_start);
- }
- }
- len = min_t(u64, em->len - offset, max_len);
- } else {
- len = em->len - offset;
+ if (op == BTRFS_MAP_WRITE)
+ return full_stripe_len - (offset - *full_stripe_start);
}
- io_geom->len = len;
- io_geom->offset = offset;
- io_geom->stripe_len = stripe_len;
- io_geom->stripe_nr = stripe_nr;
- io_geom->stripe_offset = stripe_offset;
- io_geom->raid56_stripe_offset = raid56_full_stripe_start;
-
- return 0;
+ /*
+ * For other RAID types and for RAID56 reads, allow a single stripe (on
+ * a single disk).
+ */
+ if (map->type & BTRFS_BLOCK_GROUP_STRIPE_MASK)
+ return stripe_len - *stripe_offset;
+ return U64_MAX;
}
static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *map,
@@ -6360,6 +6338,7 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
{
struct extent_map *em;
struct map_lookup *map;
+ u64 map_offset;
u64 stripe_offset;
u64 stripe_nr;
u64 stripe_len;
@@ -6378,7 +6357,7 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
int patch_the_first_stripe_for_dev_replace = 0;
u64 physical_to_patch_in_first_stripe = 0;
u64 raid56_full_stripe_start = (u64)-1;
- struct btrfs_io_geometry geom;
+ u64 max_len;
ASSERT(bioc_ret);
ASSERT(op != BTRFS_MAP_DISCARD);
@@ -6386,18 +6365,14 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
em = btrfs_get_chunk_map(fs_info, logical, *length);
ASSERT(!IS_ERR(em));
- ret = btrfs_get_io_geometry(fs_info, em, op, logical, &geom);
- if (ret < 0)
- return ret;
-
map = em->map_lookup;
-
- *length = geom.len;
- stripe_len = geom.stripe_len;
- stripe_nr = geom.stripe_nr;
- stripe_offset = geom.stripe_offset;
- raid56_full_stripe_start = geom.raid56_stripe_offset;
data_stripes = nr_data_stripes(map);
+ stripe_len = map->stripe_len;
+
+ map_offset = logical - em->start;
+ max_len = btrfs_max_io_len(map, op, map_offset, &stripe_nr,
+ &stripe_offset, &raid56_full_stripe_start);
+ *length = min_t(u64, em->len - map_offset, max_len);
down_read(&dev_replace->rwsem);
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 6b7a05f6cf82..7e51f2238f72 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -53,21 +53,6 @@ enum btrfs_raid_types {
BTRFS_NR_RAID_TYPES
};
-struct btrfs_io_geometry {
- /* remaining bytes before crossing a stripe */
- u64 len;
- /* offset of logical address in chunk */
- u64 offset;
- /* length of single IO stripe */
- u32 stripe_len;
- /* offset of address in stripe */
- u32 stripe_offset;
- /* number of stripe where address falls */
- u64 stripe_nr;
- /* offset of raid56 stripe into the chunk */
- u64 raid56_stripe_offset;
-};
-
/*
* Use sequence counter to get consistent device stat data on
* 32-bit processors.
@@ -545,9 +530,6 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info,
u64 logical, u64 *length_ret,
u32 *num_stripes);
-int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *map,
- enum btrfs_map_op op, u64 logical,
- struct btrfs_io_geometry *io_geom);
int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 0ed4b119a7ca..0ebeaf4e81f9 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -370,7 +370,7 @@ static int btrfs_xattr_handler_get(const struct xattr_handler *handler,
}
static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *buffer,
size_t size, int flags)
@@ -383,7 +383,7 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
}
static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 01a13de11832..da7bb9187b68 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -63,7 +63,7 @@ struct list_head *zlib_alloc_workspace(unsigned int level)
workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
zlib_inflate_workspacesize());
- workspace->strm.workspace = kvmalloc(workspacesize, GFP_KERNEL);
+ workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL);
workspace->level = level;
workspace->buf = NULL;
/*
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index a759668477bb..f95b2c94d619 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -17,6 +17,7 @@
#include "space-info.h"
#include "fs.h"
#include "accessors.h"
+#include "bio.h"
/* Maximum number of zones to report per blkdev_report_zones() call */
#define BTRFS_REPORT_NR_ZONES 4096
@@ -160,7 +161,7 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
*/
static inline u32 sb_zone_number(int shift, int mirror)
{
- u64 zone;
+ u64 zone = U64_MAX;
ASSERT(mirror < BTRFS_SUPER_MIRROR_MAX);
switch (mirror) {
@@ -220,7 +221,6 @@ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
struct blk_zone *zones, unsigned int *nr_zones)
{
struct btrfs_zoned_device_info *zinfo = device->zone_info;
- u32 zno;
int ret;
if (!*nr_zones)
@@ -235,6 +235,7 @@ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
/* Check cache */
if (zinfo->zone_cache) {
unsigned int i;
+ u32 zno;
ASSERT(IS_ALIGNED(pos, zinfo->zone_size));
zno = pos >> zinfo->zone_size_shift;
@@ -274,9 +275,12 @@ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
return -EIO;
/* Populate cache */
- if (zinfo->zone_cache)
+ if (zinfo->zone_cache) {
+ u32 zno = pos >> zinfo->zone_size_shift;
+
memcpy(zinfo->zone_cache + zno, zones,
sizeof(*zinfo->zone_cache) * *nr_zones);
+ }
return 0;
}
@@ -417,25 +421,6 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
nr_sectors = bdev_nr_sectors(bdev);
zone_info->zone_size_shift = ilog2(zone_info->zone_size);
zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors);
- /*
- * We limit max_zone_append_size also by max_segments *
- * PAGE_SIZE. Technically, we can have multiple pages per segment. But,
- * since btrfs adds the pages one by one to a bio, and btrfs cannot
- * increase the metadata reservation even if it increases the number of
- * extents, it is safe to stick with the limit.
- *
- * With the zoned emulation, we can have non-zoned device on the zoned
- * mode. In this case, we don't have a valid max zone append size. So,
- * use max_segments * PAGE_SIZE as the pseudo max_zone_append_size.
- */
- if (bdev_is_zoned(bdev)) {
- zone_info->max_zone_append_size = min_t(u64,
- (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
- (u64)bdev_max_segments(bdev) << PAGE_SHIFT);
- } else {
- zone_info->max_zone_append_size =
- (u64)bdev_max_segments(bdev) << PAGE_SHIFT;
- }
if (!IS_ALIGNED(nr_sectors, zone_sectors))
zone_info->nr_zones++;
@@ -539,6 +524,8 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
}
atomic_set(&zone_info->active_zones_left,
max_active_zones - nactive);
+ /* Overcommit does not work well with active zone tacking. */
+ set_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags);
}
/* Validate superblock log */
@@ -713,9 +700,9 @@ static int btrfs_check_for_zoned_device(struct btrfs_fs_info *fs_info)
int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
{
+ struct queue_limits *lim = &fs_info->limits;
struct btrfs_device *device;
u64 zone_size = 0;
- u64 max_zone_append_size = 0;
int ret;
/*
@@ -725,6 +712,8 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
if (!btrfs_fs_incompat(fs_info, ZONED))
return btrfs_check_for_zoned_device(fs_info);
+ blk_set_stacking_limits(lim);
+
list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
struct btrfs_zoned_device_info *zone_info = device->zone_info;
@@ -739,10 +728,17 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
zone_info->zone_size, zone_size);
return -EINVAL;
}
- if (!max_zone_append_size ||
- (zone_info->max_zone_append_size &&
- zone_info->max_zone_append_size < max_zone_append_size))
- max_zone_append_size = zone_info->max_zone_append_size;
+
+ /*
+ * With the zoned emulation, we can have non-zoned device on the
+ * zoned mode. In this case, we don't have a valid max zone
+ * append size.
+ */
+ if (bdev_is_zoned(device->bdev)) {
+ blk_stack_limits(lim,
+ &bdev_get_queue(device->bdev)->limits,
+ 0);
+ }
}
/*
@@ -763,8 +759,18 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
}
fs_info->zone_size = zone_size;
- fs_info->max_zone_append_size = ALIGN_DOWN(max_zone_append_size,
- fs_info->sectorsize);
+ /*
+ * Also limit max_zone_append_size by max_segments * PAGE_SIZE.
+ * Technically, we can have multiple pages per segment. But, since
+ * we add the pages one by one to a bio, and cannot increase the
+ * metadata reservation even if it increases the number of extents, it
+ * is safe to stick with the limit.
+ */
+ fs_info->max_zone_append_size = ALIGN_DOWN(
+ min3((u64)lim->max_zone_append_sectors << SECTOR_SHIFT,
+ (u64)lim->max_sectors << SECTOR_SHIFT,
+ (u64)lim->max_segments << PAGE_SHIFT),
+ fs_info->sectorsize);
fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED;
if (fs_info->max_zone_append_size < fs_info->max_extent_size)
fs_info->max_extent_size = fs_info->max_zone_append_size;
@@ -1621,8 +1627,10 @@ void btrfs_free_redirty_list(struct btrfs_transaction *trans)
spin_unlock(&trans->releasing_ebs_lock);
}
-bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start)
+bool btrfs_use_zone_append(struct btrfs_bio *bbio)
{
+ u64 start = (bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT);
+ struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_block_group *cache;
bool ret = false;
@@ -1633,6 +1641,9 @@ bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start)
if (!is_data_inode(&inode->vfs_inode))
return false;
+ if (btrfs_op(&bbio->bio) != BTRFS_MAP_WRITE)
+ return false;
+
/*
* Using REQ_OP_ZONE_APPNED for relocation can break assumptions on the
* extent layout the relocation code has.
@@ -1655,22 +1666,16 @@ bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start)
return ret;
}
-void btrfs_record_physical_zoned(struct inode *inode, u64 file_offset,
- struct bio *bio)
+void btrfs_record_physical_zoned(struct btrfs_bio *bbio)
{
+ const u64 physical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
struct btrfs_ordered_extent *ordered;
- const u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
- if (bio_op(bio) != REQ_OP_ZONE_APPEND)
- return;
-
- ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), file_offset);
+ ordered = btrfs_lookup_ordered_extent(bbio->inode, bbio->file_offset);
if (WARN_ON(!ordered))
return;
ordered->physical = physical;
- ordered->bdev = bio->bi_bdev;
-
btrfs_put_ordered_extent(ordered);
}
@@ -1682,43 +1687,46 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered)
struct extent_map *em;
struct btrfs_ordered_sum *sum;
u64 orig_logical = ordered->disk_bytenr;
- u64 *logical = NULL;
- int nr, stripe_len;
+ struct map_lookup *map;
+ u64 physical = ordered->physical;
+ u64 chunk_start_phys;
+ u64 logical;
- /* Zoned devices should not have partitions. So, we can assume it is 0 */
- ASSERT(!bdev_is_partition(ordered->bdev));
- if (WARN_ON(!ordered->bdev))
+ em = btrfs_get_chunk_map(fs_info, orig_logical, 1);
+ if (IS_ERR(em))
return;
+ map = em->map_lookup;
+ chunk_start_phys = map->stripes[0].physical;
- if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, ordered->bdev,
- ordered->physical, &logical, &nr,
- &stripe_len)))
- goto out;
-
- WARN_ON(nr != 1);
+ if (WARN_ON_ONCE(map->num_stripes > 1) ||
+ WARN_ON_ONCE((map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) ||
+ WARN_ON_ONCE(physical < chunk_start_phys) ||
+ WARN_ON_ONCE(physical > chunk_start_phys + em->orig_block_len)) {
+ free_extent_map(em);
+ return;
+ }
+ logical = em->start + (physical - map->stripes[0].physical);
+ free_extent_map(em);
- if (orig_logical == *logical)
- goto out;
+ if (orig_logical == logical)
+ return;
- ordered->disk_bytenr = *logical;
+ ordered->disk_bytenr = logical;
em_tree = &inode->extent_tree;
write_lock(&em_tree->lock);
em = search_extent_mapping(em_tree, ordered->file_offset,
ordered->num_bytes);
- em->block_start = *logical;
+ em->block_start = logical;
free_extent_map(em);
write_unlock(&em_tree->lock);
list_for_each_entry(sum, &ordered->list, list) {
- if (*logical < orig_logical)
- sum->bytenr -= orig_logical - *logical;
+ if (logical < orig_logical)
+ sum->bytenr -= orig_logical - logical;
else
- sum->bytenr += *logical - orig_logical;
+ sum->bytenr += logical - orig_logical;
}
-
-out:
- kfree(logical);
}
bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
@@ -1843,26 +1851,6 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
return btrfs_zoned_issue_zeroout(tgt_dev, physical_pos, length);
}
-struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
- u64 logical, u64 length)
-{
- struct btrfs_device *device;
- struct extent_map *em;
- struct map_lookup *map;
-
- em = btrfs_get_chunk_map(fs_info, logical, length);
- if (IS_ERR(em))
- return ERR_CAST(em);
-
- map = em->map_lookup;
- /* We only support single profile for now */
- device = map->stripes[0].dev;
-
- free_extent_map(em);
-
- return device;
-}
-
/*
* Activate block group and underlying device zones
*
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index f43990985d80..c0570d35fea2 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -20,7 +20,6 @@ struct btrfs_zoned_device_info {
*/
u64 zone_size;
u8 zone_size_shift;
- u64 max_zone_append_size;
u32 nr_zones;
unsigned int max_active_zones;
atomic_t active_zones_left;
@@ -56,9 +55,8 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache);
void btrfs_redirty_list_add(struct btrfs_transaction *trans,
struct extent_buffer *eb);
void btrfs_free_redirty_list(struct btrfs_transaction *trans);
-bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start);
-void btrfs_record_physical_zoned(struct inode *inode, u64 file_offset,
- struct bio *bio);
+bool btrfs_use_zone_append(struct btrfs_bio *bbio);
+void btrfs_record_physical_zoned(struct btrfs_bio *bbio);
void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered);
bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb,
@@ -68,8 +66,6 @@ void btrfs_revert_meta_write_pointer(struct btrfs_block_group *cache,
int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 length);
int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
u64 physical_start, u64 physical_pos);
-struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
- u64 logical, u64 length);
bool btrfs_zone_activate(struct btrfs_block_group *block_group);
int btrfs_zone_finish(struct btrfs_block_group *block_group);
bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags);
@@ -185,13 +181,12 @@ static inline void btrfs_redirty_list_add(struct btrfs_transaction *trans,
struct extent_buffer *eb) { }
static inline void btrfs_free_redirty_list(struct btrfs_transaction *trans) { }
-static inline bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start)
+static inline bool btrfs_use_zone_append(struct btrfs_bio *bbio)
{
return false;
}
-static inline void btrfs_record_physical_zoned(struct inode *inode,
- u64 file_offset, struct bio *bio)
+static inline void btrfs_record_physical_zoned(struct btrfs_bio *bbio)
{
}
@@ -224,13 +219,6 @@ static inline int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev,
return -EOPNOTSUPP;
}
-static inline struct btrfs_device *btrfs_zoned_get_device(
- struct btrfs_fs_info *fs_info,
- u64 logical, u64 length)
-{
- return ERR_PTR(-EOPNOTSUPP);
-}
-
static inline bool btrfs_zone_activate(struct btrfs_block_group *block_group)
{
return true;
diff --git a/fs/buffer.c b/fs/buffer.c
index d9c6d1fbb6dd..623e77d6ef77 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -48,6 +48,7 @@
#include <linux/sched/mm.h>
#include <trace/events/block.h>
#include <linux/fscrypt.h>
+#include <linux/fsverity.h>
#include "internal.h"
@@ -295,20 +296,53 @@ still_busy:
return;
}
-struct decrypt_bh_ctx {
+struct postprocess_bh_ctx {
struct work_struct work;
struct buffer_head *bh;
};
+static void verify_bh(struct work_struct *work)
+{
+ struct postprocess_bh_ctx *ctx =
+ container_of(work, struct postprocess_bh_ctx, work);
+ struct buffer_head *bh = ctx->bh;
+ bool valid;
+
+ valid = fsverity_verify_blocks(page_folio(bh->b_page), bh->b_size,
+ bh_offset(bh));
+ end_buffer_async_read(bh, valid);
+ kfree(ctx);
+}
+
+static bool need_fsverity(struct buffer_head *bh)
+{
+ struct page *page = bh->b_page;
+ struct inode *inode = page->mapping->host;
+
+ return fsverity_active(inode) &&
+ /* needed by ext4 */
+ page->index < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
+}
+
static void decrypt_bh(struct work_struct *work)
{
- struct decrypt_bh_ctx *ctx =
- container_of(work, struct decrypt_bh_ctx, work);
+ struct postprocess_bh_ctx *ctx =
+ container_of(work, struct postprocess_bh_ctx, work);
struct buffer_head *bh = ctx->bh;
int err;
- err = fscrypt_decrypt_pagecache_blocks(bh->b_page, bh->b_size,
- bh_offset(bh));
+ err = fscrypt_decrypt_pagecache_blocks(page_folio(bh->b_page),
+ bh->b_size, bh_offset(bh));
+ if (err == 0 && need_fsverity(bh)) {
+ /*
+ * We use different work queues for decryption and for verity
+ * because verity may require reading metadata pages that need
+ * decryption, and we shouldn't recurse to the same workqueue.
+ */
+ INIT_WORK(&ctx->work, verify_bh);
+ fsverity_enqueue_verify_work(&ctx->work);
+ return;
+ }
end_buffer_async_read(bh, err == 0);
kfree(ctx);
}
@@ -319,15 +353,24 @@ static void decrypt_bh(struct work_struct *work)
*/
static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate)
{
- /* Decrypt if needed */
- if (uptodate &&
- fscrypt_inode_uses_fs_layer_crypto(bh->b_page->mapping->host)) {
- struct decrypt_bh_ctx *ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
+ struct inode *inode = bh->b_page->mapping->host;
+ bool decrypt = fscrypt_inode_uses_fs_layer_crypto(inode);
+ bool verify = need_fsverity(bh);
+
+ /* Decrypt (with fscrypt) and/or verify (with fsverity) if needed. */
+ if (uptodate && (decrypt || verify)) {
+ struct postprocess_bh_ctx *ctx =
+ kmalloc(sizeof(*ctx), GFP_ATOMIC);
if (ctx) {
- INIT_WORK(&ctx->work, decrypt_bh);
ctx->bh = bh;
- fscrypt_enqueue_decrypt_work(&ctx->work);
+ if (decrypt) {
+ INIT_WORK(&ctx->work, decrypt_bh);
+ fscrypt_enqueue_decrypt_work(&ctx->work);
+ } else {
+ INIT_WORK(&ctx->work, verify_bh);
+ fsverity_enqueue_verify_work(&ctx->work);
+ }
return;
}
uptodate = 0;
@@ -2245,6 +2288,11 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block)
int nr, i;
int fully_mapped = 1;
bool page_error = false;
+ loff_t limit = i_size_read(inode);
+
+ /* This is needed for ext4. */
+ if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode))
+ limit = inode->i_sb->s_maxbytes;
VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
@@ -2253,7 +2301,7 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block)
bbits = block_size_bits(blocksize);
iblock = (sector_t)folio->index << (PAGE_SHIFT - bbits);
- lblock = (i_size_read(inode)+blocksize-1) >> bbits;
+ lblock = (limit+blocksize-1) >> bbits;
bh = head;
nr = 0;
i = 0;
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index a69073a1d3f0..40052bdb3365 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -138,7 +138,7 @@ static int cachefiles_adjust_size(struct cachefiles_object *object)
newattrs.ia_size = oi_size & PAGE_MASK;
ret = cachefiles_inject_remove_error();
if (ret == 0)
- ret = notify_change(&init_user_ns, file->f_path.dentry,
+ ret = notify_change(&nop_mnt_idmap, file->f_path.dentry,
&newattrs, NULL);
if (ret < 0)
goto truncate_failed;
@@ -148,7 +148,7 @@ static int cachefiles_adjust_size(struct cachefiles_object *object)
newattrs.ia_size = ni_size;
ret = cachefiles_inject_write_error();
if (ret == 0)
- ret = notify_change(&init_user_ns, file->f_path.dentry,
+ ret = notify_change(&nop_mnt_idmap, file->f_path.dentry,
&newattrs, NULL);
truncate_failed:
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 03ca8f2f657a..82219a8f6084 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -130,7 +130,7 @@ retry:
goto mkdir_error;
ret = cachefiles_inject_write_error();
if (ret == 0)
- ret = vfs_mkdir(&init_user_ns, d_inode(dir), subdir, 0700);
+ ret = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700);
if (ret < 0) {
trace_cachefiles_vfs_error(NULL, d_inode(dir), ret,
cachefiles_trace_mkdir_error);
@@ -245,7 +245,7 @@ static int cachefiles_unlink(struct cachefiles_cache *cache,
ret = cachefiles_inject_remove_error();
if (ret == 0) {
- ret = vfs_unlink(&init_user_ns, d_backing_inode(dir), dentry, NULL);
+ ret = vfs_unlink(&nop_mnt_idmap, d_backing_inode(dir), dentry, NULL);
if (ret == -EIO)
cachefiles_io_error(cache, "Unlink failed");
}
@@ -382,10 +382,10 @@ try_again:
cachefiles_io_error(cache, "Rename security error %d", ret);
} else {
struct renamedata rd = {
- .old_mnt_userns = &init_user_ns,
+ .old_mnt_idmap = &nop_mnt_idmap,
.old_dir = d_inode(dir),
.old_dentry = rep,
- .new_mnt_userns = &init_user_ns,
+ .new_mnt_idmap = &nop_mnt_idmap,
.new_dir = d_inode(cache->graveyard),
.new_dentry = grave,
};
@@ -451,7 +451,7 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object)
ret = cachefiles_inject_write_error();
if (ret == 0) {
- file = vfs_tmpfile_open(&init_user_ns, &parentpath, S_IFREG,
+ file = vfs_tmpfile_open(&nop_mnt_idmap, &parentpath, S_IFREG,
O_RDWR | O_LARGEFILE | O_DIRECT,
cache->cache_cred);
ret = PTR_ERR_OR_ZERO(file);
@@ -714,7 +714,7 @@ bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache,
ret = cachefiles_inject_read_error();
if (ret == 0)
- ret = vfs_link(object->file->f_path.dentry, &init_user_ns,
+ ret = vfs_link(object->file->f_path.dentry, &nop_mnt_idmap,
d_inode(fan), dentry, NULL);
if (ret < 0) {
trace_cachefiles_vfs_error(object, d_inode(fan), ret,
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index 00b087c14995..bcb6173943ee 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -65,7 +65,7 @@ int cachefiles_set_object_xattr(struct cachefiles_object *object)
ret = cachefiles_inject_write_error();
if (ret == 0)
- ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache,
+ ret = vfs_setxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache,
buf, sizeof(struct cachefiles_xattr) + len, 0);
if (ret < 0) {
trace_cachefiles_vfs_error(object, file_inode(file), ret,
@@ -108,7 +108,7 @@ int cachefiles_check_auxdata(struct cachefiles_object *object, struct file *file
xlen = cachefiles_inject_read_error();
if (xlen == 0)
- xlen = vfs_getxattr(&init_user_ns, dentry, cachefiles_xattr_cache, buf, tlen);
+ xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, tlen);
if (xlen != tlen) {
if (xlen < 0)
trace_cachefiles_vfs_error(object, file_inode(file), xlen,
@@ -150,7 +150,7 @@ int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,
ret = cachefiles_inject_remove_error();
if (ret == 0)
- ret = vfs_removexattr(&init_user_ns, dentry, cachefiles_xattr_cache);
+ ret = vfs_removexattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache);
if (ret < 0) {
trace_cachefiles_vfs_error(object, d_inode(dentry), ret,
cachefiles_trace_remxattr_error);
@@ -207,7 +207,7 @@ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume)
ret = cachefiles_inject_write_error();
if (ret == 0)
- ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache,
+ ret = vfs_setxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache,
buf, len, 0);
if (ret < 0) {
trace_cachefiles_vfs_error(NULL, d_inode(dentry), ret,
@@ -249,7 +249,7 @@ int cachefiles_check_volume_xattr(struct cachefiles_volume *volume)
xlen = cachefiles_inject_read_error();
if (xlen == 0)
- xlen = vfs_getxattr(&init_user_ns, dentry, cachefiles_xattr_cache, buf, len);
+ xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, len);
if (xlen != len) {
if (xlen < 0) {
trace_cachefiles_vfs_error(NULL, d_inode(dentry), xlen,
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index c7e8dd5b58d4..6945a938d396 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -85,7 +85,7 @@ retry:
return acl;
}
-int ceph_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
int ret = 0, size = 0;
@@ -105,7 +105,7 @@ int ceph_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
case ACL_TYPE_ACCESS:
name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
- ret = posix_acl_update_mode(&init_user_ns, inode,
+ ret = posix_acl_update_mode(&nop_mnt_idmap, inode,
&new_mode, &acl);
if (ret)
goto out;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 8c74871e37c9..cac4083e387a 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -305,7 +305,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
struct inode *inode = rreq->inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
- struct ceph_osd_request *req;
+ struct ceph_osd_request *req = NULL;
struct ceph_vino vino = ceph_vino(inode);
struct iov_iter iter;
struct page **pages;
@@ -313,6 +313,11 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
int err = 0;
u64 len = subreq->len;
+ if (ceph_inode_is_shutdown(inode)) {
+ err = -EIO;
+ goto out;
+ }
+
if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq))
return;
@@ -563,6 +568,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
dout("writepage %p idx %lu\n", page, page->index);
+ if (ceph_inode_is_shutdown(inode))
+ return -EIO;
+
/* verify this is a writeable snap context */
snapc = page_snap_context(page);
if (!snapc) {
@@ -1643,7 +1651,7 @@ int ceph_uninline_data(struct file *file)
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_osd_request *req = NULL;
- struct ceph_cap_flush *prealloc_cf;
+ struct ceph_cap_flush *prealloc_cf = NULL;
struct folio *folio = NULL;
u64 inline_version = CEPH_INLINE_NONE;
struct page *pages[1];
@@ -1657,6 +1665,11 @@ int ceph_uninline_data(struct file *file)
dout("uninline_data %p %llx.%llx inline_version %llu\n",
inode, ceph_vinop(inode), inline_version);
+ if (ceph_inode_is_shutdown(inode)) {
+ err = -EIO;
+ goto out;
+ }
+
if (inline_version == CEPH_INLINE_NONE)
return 0;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 4b159f97fe7b..7cc20772eac9 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -9,6 +9,7 @@
#include <linux/wait.h>
#include <linux/writeback.h>
#include <linux/iversion.h>
+#include <linux/filelock.h>
#include "super.h"
#include "mds_client.h"
@@ -2913,7 +2914,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got
while (true) {
flags &= CEPH_FILE_MODE_MASK;
- if (atomic_read(&fi->num_locks))
+ if (vfs_inode_has_locks(inode))
flags |= CHECK_FILELOCK;
_got = 0;
ret = try_get_cap_refs(inode, need, want, endoff,
@@ -4078,6 +4079,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
void *p, *end;
struct cap_extra_info extra_info = {};
bool queue_trunc;
+ bool close_sessions = false;
dout("handle_caps from mds%d\n", session->s_mds);
@@ -4215,9 +4217,13 @@ void ceph_handle_caps(struct ceph_mds_session *session,
realm = NULL;
if (snaptrace_len) {
down_write(&mdsc->snap_rwsem);
- ceph_update_snap_trace(mdsc, snaptrace,
- snaptrace + snaptrace_len,
- false, &realm);
+ if (ceph_update_snap_trace(mdsc, snaptrace,
+ snaptrace + snaptrace_len,
+ false, &realm)) {
+ up_write(&mdsc->snap_rwsem);
+ close_sessions = true;
+ goto done;
+ }
downgrade_write(&mdsc->snap_rwsem);
} else {
down_read(&mdsc->snap_rwsem);
@@ -4277,6 +4283,11 @@ done_unlocked:
iput(inode);
out:
ceph_put_string(extra_info.pool_ns);
+
+ /* Defer closing the sessions after s_mutex lock being released */
+ if (close_sessions)
+ ceph_mdsc_close_sessions(mdsc);
+
return;
flush_cap_releases:
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 6c7026cc8988..0ced8b570e42 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -845,7 +845,7 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
return PTR_ERR(result);
}
-static int ceph_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
@@ -905,13 +905,13 @@ out:
return err;
}
-static int ceph_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int ceph_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
- return ceph_mknod(mnt_userns, dir, dentry, mode, 0);
+ return ceph_mknod(idmap, dir, dentry, mode, 0);
}
-static int ceph_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *dest)
{
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
@@ -970,7 +970,7 @@ out:
return err;
}
-static int ceph_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
@@ -1269,7 +1269,7 @@ out:
return err;
}
-static int ceph_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 764598e1efd9..b5cff85925a1 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -2011,6 +2011,9 @@ static int ceph_zero_partial_object(struct inode *inode,
loff_t zero = 0;
int op;
+ if (ceph_inode_is_shutdown(inode))
+ return -EIO;
+
if (!length) {
op = offset ? CEPH_OSD_OP_DELETE : CEPH_OSD_OP_TRUNCATE;
length = &zero;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 23d05ec87fcc..8e5f41d45283 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -2227,7 +2227,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
/*
* setattr
*/
-int ceph_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
@@ -2240,7 +2240,7 @@ int ceph_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if (ceph_inode_is_shutdown(inode))
return -ESTALE;
- err = setattr_prepare(&init_user_ns, dentry, attr);
+ err = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (err != 0)
return err;
@@ -2255,7 +2255,7 @@ int ceph_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
err = __ceph_setattr(inode, attr);
if (err >= 0 && (attr->ia_valid & ATTR_MODE))
- err = posix_acl_chmod(&init_user_ns, dentry, attr->ia_mode);
+ err = posix_acl_chmod(&nop_mnt_idmap, dentry, attr->ia_mode);
return err;
}
@@ -2397,7 +2397,7 @@ out:
* Check inode permissions. We verify we have a valid value for
* the AUTH cap, then call the generic handler.
*/
-int ceph_permission(struct user_namespace *mnt_userns, struct inode *inode,
+int ceph_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask)
{
int err;
@@ -2408,7 +2408,7 @@ int ceph_permission(struct user_namespace *mnt_userns, struct inode *inode,
err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED, false);
if (!err)
- err = generic_permission(&init_user_ns, inode, mask);
+ err = generic_permission(&nop_mnt_idmap, inode, mask);
return err;
}
@@ -2417,10 +2417,10 @@ static int statx_to_caps(u32 want, umode_t mode)
{
int mask = 0;
- if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME))
+ if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME|STATX_CHANGE_COOKIE))
mask |= CEPH_CAP_AUTH_SHARED;
- if (want & (STATX_NLINK|STATX_CTIME)) {
+ if (want & (STATX_NLINK|STATX_CTIME|STATX_CHANGE_COOKIE)) {
/*
* The link count for directories depends on inode->i_subdirs,
* and that is only updated when Fs caps are held.
@@ -2431,11 +2431,10 @@ static int statx_to_caps(u32 want, umode_t mode)
mask |= CEPH_CAP_LINK_SHARED;
}
- if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE|
- STATX_BLOCKS))
+ if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE|STATX_BLOCKS|STATX_CHANGE_COOKIE))
mask |= CEPH_CAP_FILE_SHARED;
- if (want & (STATX_CTIME))
+ if (want & (STATX_CTIME|STATX_CHANGE_COOKIE))
mask |= CEPH_CAP_XATTR_SHARED;
return mask;
@@ -2445,7 +2444,7 @@ static int statx_to_caps(u32 want, umode_t mode)
* Get all the attributes. If we have sufficient caps for the requested attrs,
* then we can avoid talking to the MDS at all.
*/
-int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int ceph_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct inode *inode = d_inode(path->dentry);
@@ -2466,7 +2465,7 @@ int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path,
return err;
}
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
stat->ino = ceph_present_inode(inode);
/*
@@ -2478,6 +2477,11 @@ int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path,
valid_mask |= STATX_BTIME;
}
+ if (request_mask & STATX_CHANGE_COOKIE) {
+ stat->change_cookie = inode_peek_iversion_raw(inode);
+ valid_mask |= STATX_CHANGE_COOKIE;
+ }
+
if (ceph_snap(inode) == CEPH_NOSNAP)
stat->dev = sb->s_dev;
else
@@ -2519,6 +2523,8 @@ int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path,
stat->nlink = 1 + 1 + ci->i_subdirs;
}
+ stat->attributes_mask |= STATX_ATTR_CHANGE_MONOTONIC;
+ stat->attributes |= STATX_ATTR_CHANGE_MONOTONIC;
stat->result_mask = request_mask & valid_mask;
return err;
}
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index f3b461c708a8..cb51c7e9c8e2 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -7,6 +7,7 @@
#include "super.h"
#include "mds_client.h"
+#include <linux/filelock.h>
#include <linux/ceph/pagelist.h>
static u64 lock_secret;
@@ -32,24 +33,36 @@ void __init ceph_flock_init(void)
static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
{
- struct ceph_file_info *fi = dst->fl_file->private_data;
struct inode *inode = file_inode(dst->fl_file);
atomic_inc(&ceph_inode(inode)->i_filelock_ref);
- atomic_inc(&fi->num_locks);
+ dst->fl_u.ceph.inode = igrab(inode);
}
+/*
+ * Do not use the 'fl->fl_file' in release function, which
+ * is possibly already released by another thread.
+ */
static void ceph_fl_release_lock(struct file_lock *fl)
{
- struct ceph_file_info *fi = fl->fl_file->private_data;
- struct inode *inode = file_inode(fl->fl_file);
- struct ceph_inode_info *ci = ceph_inode(inode);
- atomic_dec(&fi->num_locks);
+ struct inode *inode = fl->fl_u.ceph.inode;
+ struct ceph_inode_info *ci;
+
+ /*
+ * If inode is NULL it should be a request file_lock,
+ * nothing we can do.
+ */
+ if (!inode)
+ return;
+
+ ci = ceph_inode(inode);
if (atomic_dec_and_test(&ci->i_filelock_ref)) {
/* clear error when all locks are released */
spin_lock(&ci->i_ceph_lock);
ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
spin_unlock(&ci->i_ceph_lock);
}
+ fl->fl_u.ceph.inode = NULL;
+ iput(inode);
}
static const struct file_lock_operations ceph_fl_lock_ops = {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 26a0a8b9975e..27a245d959c0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -806,6 +806,9 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
{
struct ceph_mds_session *s;
+ if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO)
+ return ERR_PTR(-EIO);
+
if (mds >= mdsc->mdsmap->possible_max_rank)
return ERR_PTR(-EINVAL);
@@ -1478,6 +1481,9 @@ static int __open_session(struct ceph_mds_client *mdsc,
int mstate;
int mds = session->s_mds;
+ if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO)
+ return -EIO;
+
/* wait for mds to go active? */
mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds);
dout("open_session to mds%d (%s)\n", mds,
@@ -2860,6 +2866,11 @@ static void __do_request(struct ceph_mds_client *mdsc,
return;
}
+ if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO) {
+ dout("do_request metadata corrupted\n");
+ err = -EIO;
+ goto finish;
+ }
if (req->r_timeout &&
time_after_eq(jiffies, req->r_started + req->r_timeout)) {
dout("do_request timed out\n");
@@ -3245,6 +3256,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
u64 tid;
int err, result;
int mds = session->s_mds;
+ bool close_sessions = false;
if (msg->front.iov_len < sizeof(*head)) {
pr_err("mdsc_handle_reply got corrupt (short) reply\n");
@@ -3351,10 +3363,17 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
realm = NULL;
if (rinfo->snapblob_len) {
down_write(&mdsc->snap_rwsem);
- ceph_update_snap_trace(mdsc, rinfo->snapblob,
+ err = ceph_update_snap_trace(mdsc, rinfo->snapblob,
rinfo->snapblob + rinfo->snapblob_len,
le32_to_cpu(head->op) == CEPH_MDS_OP_RMSNAP,
&realm);
+ if (err) {
+ up_write(&mdsc->snap_rwsem);
+ close_sessions = true;
+ if (err == -EIO)
+ ceph_msg_dump(msg);
+ goto out_err;
+ }
downgrade_write(&mdsc->snap_rwsem);
} else {
down_read(&mdsc->snap_rwsem);
@@ -3412,6 +3431,10 @@ out_err:
req->r_end_latency, err);
out:
ceph_mdsc_put_request(req);
+
+ /* Defer closing the sessions after s_mutex lock being released */
+ if (close_sessions)
+ ceph_mdsc_close_sessions(mdsc);
return;
}
@@ -3662,6 +3685,12 @@ static void handle_session(struct ceph_mds_session *session,
break;
case CEPH_SESSION_FLUSHMSG:
+ /* flush cap releases */
+ spin_lock(&session->s_cap_lock);
+ if (session->s_num_cap_releases)
+ ceph_flush_cap_releases(mdsc, session);
+ spin_unlock(&session->s_cap_lock);
+
send_flushmsg_ack(mdsc, session, seq);
break;
@@ -5011,7 +5040,7 @@ static bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped)
}
/*
- * called after sb is ro.
+ * called after sb is ro or when metadata corrupted.
*/
void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
{
@@ -5301,7 +5330,8 @@ static void mds_peer_reset(struct ceph_connection *con)
struct ceph_mds_client *mdsc = s->s_mdsc;
pr_warn("mds%d closed our session\n", s->s_mds);
- send_mds_reconnect(mdsc, s);
+ if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO)
+ send_mds_reconnect(mdsc, s);
}
static void mds_dispatch(struct ceph_connection *con, struct ceph_msg *msg)
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index e4151852184e..87007203f130 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
+#include <linux/fs.h>
#include <linux/sort.h>
#include <linux/slab.h>
#include <linux/iversion.h>
@@ -766,8 +767,10 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
struct ceph_snap_realm *realm;
struct ceph_snap_realm *first_realm = NULL;
struct ceph_snap_realm *realm_to_rebuild = NULL;
+ struct ceph_client *client = mdsc->fsc->client;
int rebuild_snapcs;
int err = -ENOMEM;
+ int ret;
LIST_HEAD(dirty_realms);
lockdep_assert_held_write(&mdsc->snap_rwsem);
@@ -884,6 +887,27 @@ fail:
if (first_realm)
ceph_put_snap_realm(mdsc, first_realm);
pr_err("%s error %d\n", __func__, err);
+
+ /*
+ * When receiving a corrupted snap trace we don't know what
+ * exactly has happened in MDS side. And we shouldn't continue
+ * writing to OSD, which may corrupt the snapshot contents.
+ *
+ * Just try to blocklist this kclient and then this kclient
+ * must be remounted to continue after the corrupted metadata
+ * fixed in the MDS side.
+ */
+ WRITE_ONCE(mdsc->fsc->mount_state, CEPH_MOUNT_FENCE_IO);
+ ret = ceph_monc_blocklist_add(&client->monc, &client->msgr.inst.addr);
+ if (ret)
+ pr_err("%s failed to blocklist %s: %d\n", __func__,
+ ceph_pr_addr(&client->msgr.inst.addr), ret);
+
+ WARN(1, "%s: %s%sdo remount to continue%s",
+ __func__, ret ? "" : ceph_pr_addr(&client->msgr.inst.addr),
+ ret ? "" : " was blocklisted, ",
+ err == -EIO ? " after corrupted snaptrace is fixed" : "");
+
return err;
}
@@ -984,6 +1008,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
__le64 *split_inos = NULL, *split_realms = NULL;
int i;
int locked_rwsem = 0;
+ bool close_sessions = false;
/* decode */
if (msg->front.iov_len < sizeof(*h))
@@ -1092,8 +1117,12 @@ skip_inode:
* update using the provided snap trace. if we are deleting a
* snap, we can avoid queueing cap_snaps.
*/
- ceph_update_snap_trace(mdsc, p, e,
- op == CEPH_SNAP_OP_DESTROY, NULL);
+ if (ceph_update_snap_trace(mdsc, p, e,
+ op == CEPH_SNAP_OP_DESTROY,
+ NULL)) {
+ close_sessions = true;
+ goto bad;
+ }
if (op == CEPH_SNAP_OP_SPLIT)
/* we took a reference when we created the realm, above */
@@ -1112,6 +1141,9 @@ bad:
out:
if (locked_rwsem)
up_write(&mdsc->snap_rwsem);
+
+ if (close_sessions)
+ ceph_mdsc_close_sessions(mdsc);
return;
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 30bdb391a0dc..6ecca2c6d137 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -100,6 +100,17 @@ struct ceph_mount_options {
char *mon_addr;
};
+/* mount state */
+enum {
+ CEPH_MOUNT_MOUNTING,
+ CEPH_MOUNT_MOUNTED,
+ CEPH_MOUNT_UNMOUNTING,
+ CEPH_MOUNT_UNMOUNTED,
+ CEPH_MOUNT_SHUTDOWN,
+ CEPH_MOUNT_RECOVER,
+ CEPH_MOUNT_FENCE_IO,
+};
+
#define CEPH_ASYNC_CREATE_CONFLICT_BITS 8
struct ceph_fs_client {
@@ -790,7 +801,6 @@ struct ceph_file_info {
struct list_head rw_contexts;
u32 filp_gen;
- atomic_t num_locks;
};
struct ceph_dir_file_info {
@@ -1040,12 +1050,12 @@ static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
{
return __ceph_do_getattr(inode, NULL, mask, force);
}
-extern int ceph_permission(struct user_namespace *mnt_userns,
+extern int ceph_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask);
extern int __ceph_setattr(struct inode *inode, struct iattr *attr);
-extern int ceph_setattr(struct user_namespace *mnt_userns,
+extern int ceph_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr);
-extern int ceph_getattr(struct user_namespace *mnt_userns,
+extern int ceph_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags);
void ceph_inode_shutdown(struct inode *inode);
@@ -1118,7 +1128,7 @@ void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx);
#ifdef CONFIG_CEPH_FS_POSIX_ACL
struct posix_acl *ceph_get_acl(struct inode *, int, bool);
-int ceph_set_acl(struct user_namespace *mnt_userns,
+int ceph_set_acl(struct mnt_idmap *idmap,
struct dentry *dentry, struct posix_acl *acl, int type);
int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
struct ceph_acl_sec_ctx *as_ctx);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index f31350cda960..f65b07cc33a2 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -1285,7 +1285,7 @@ static int ceph_get_xattr_handler(const struct xattr_handler *handler,
}
static int ceph_set_xattr_handler(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index bbf58c2439da..9a2d390bd06f 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -1674,7 +1674,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
return rc;
}
-struct posix_acl *cifs_get_acl(struct user_namespace *mnt_userns,
+struct posix_acl *cifs_get_acl(struct mnt_idmap *idmap,
struct dentry *dentry, int type)
{
#if defined(CONFIG_CIFS_ALLOW_INSECURE_LEGACY) && defined(CONFIG_CIFS_POSIX)
@@ -1738,7 +1738,7 @@ out:
#endif
}
-int cifs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int cifs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
#if defined(CONFIG_CIFS_ALLOW_INSECURE_LEGACY) && defined(CONFIG_CIFS_POSIX)
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 5db73c0f792a..cbc18b4a9cb2 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -278,6 +278,7 @@ build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp)
* ( for NTLMSSP_AV_NB_DOMAIN_NAME followed by NTLMSSP_AV_EOL ) +
* unicode length of a netbios domain name
*/
+ kfree_sensitive(ses->auth_key.response);
ses->auth_key.len = size + 2 * dlen;
ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL);
if (!ses->auth_key.response) {
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 10e00c624922..cb7c5460a80b 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/mount.h>
#include <linux/slab.h>
#include <linux/init.h>
@@ -345,7 +346,7 @@ static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len)
return -EOPNOTSUPP;
}
-static int cifs_permission(struct user_namespace *mnt_userns,
+static int cifs_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
struct cifs_sb_info *cifs_sb;
@@ -361,7 +362,7 @@ static int cifs_permission(struct user_namespace *mnt_userns,
on the client (above and beyond ACL on servers) for
servers which do not support setting and viewing mode bits,
so allowing client to check permissions is useful */
- return generic_permission(&init_user_ns, inode, mask);
+ return generic_permission(&nop_mnt_idmap, inode, mask);
}
static struct kmem_cache *cifs_inode_cachep;
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 63a0ac2b9355..b58cd737b21e 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -49,7 +49,7 @@ extern void cifs_sb_deactive(struct super_block *sb);
/* Functions related to inodes */
extern const struct inode_operations cifs_dir_inode_ops;
extern struct inode *cifs_root_iget(struct super_block *);
-extern int cifs_create(struct user_namespace *, struct inode *,
+extern int cifs_create(struct mnt_idmap *, struct inode *,
struct dentry *, umode_t, bool excl);
extern int cifs_atomic_open(struct inode *, struct dentry *,
struct file *, unsigned, umode_t);
@@ -57,12 +57,12 @@ extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
unsigned int);
extern int cifs_unlink(struct inode *dir, struct dentry *dentry);
extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *);
-extern int cifs_mknod(struct user_namespace *, struct inode *, struct dentry *,
+extern int cifs_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
umode_t, dev_t);
-extern int cifs_mkdir(struct user_namespace *, struct inode *, struct dentry *,
+extern int cifs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *,
umode_t);
extern int cifs_rmdir(struct inode *, struct dentry *);
-extern int cifs_rename2(struct user_namespace *, struct inode *,
+extern int cifs_rename2(struct mnt_idmap *, struct inode *,
struct dentry *, struct inode *, struct dentry *,
unsigned int);
extern int cifs_revalidate_file_attr(struct file *filp);
@@ -72,9 +72,9 @@ extern int cifs_revalidate_dentry(struct dentry *);
extern int cifs_invalidate_mapping(struct inode *inode);
extern int cifs_revalidate_mapping(struct inode *inode);
extern int cifs_zap_mapping(struct inode *inode);
-extern int cifs_getattr(struct user_namespace *, const struct path *,
+extern int cifs_getattr(struct mnt_idmap *, const struct path *,
struct kstat *, u32, unsigned int);
-extern int cifs_setattr(struct user_namespace *, struct dentry *,
+extern int cifs_setattr(struct mnt_idmap *, struct dentry *,
struct iattr *);
extern int cifs_fiemap(struct inode *, struct fiemap_extent_info *, u64 start,
u64 len);
@@ -124,7 +124,7 @@ extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
/* Functions related to symlinks */
extern const char *cifs_get_link(struct dentry *, struct inode *,
struct delayed_call *);
-extern int cifs_symlink(struct user_namespace *mnt_userns, struct inode *inode,
+extern int cifs_symlink(struct mnt_idmap *idmap, struct inode *inode,
struct dentry *direntry, const char *symname);
#ifdef CONFIG_CIFS_XATTR
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index cfdd5bf701a1..cd8171a1c9a0 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -26,6 +26,7 @@
#include <uapi/linux/cifs/cifs_mount.h>
#include "../smbfs_common/smb2pdu.h"
#include "smb2pdu.h"
+#include <linux/filelock.h>
#define SMB_PATH_MAX 260
#define CIFS_PORT 445
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 1207b39686fb..b8a47704a6ef 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -225,9 +225,9 @@ extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *,
const char *, u32 *, u32);
extern struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *,
const struct cifs_fid *, u32 *, u32);
-extern struct posix_acl *cifs_get_acl(struct user_namespace *mnt_userns,
+extern struct posix_acl *cifs_get_acl(struct mnt_idmap *idmap,
struct dentry *dentry, int type);
-extern int cifs_set_acl(struct user_namespace *mnt_userns,
+extern int cifs_set_acl(struct mnt_idmap *idmap,
struct dentry *dentry, struct posix_acl *acl, int type);
extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *,
const char *, int);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 23f10e0d6e7e..60dd4e37030a 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -15,6 +15,7 @@
/* want to reuse a stale file handle and only the caller knows the file info */
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/kernel.h>
#include <linux/vfs.h>
#include <linux/slab.h>
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index d371259d6808..b2a04b4e89a5 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2606,11 +2606,14 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
INIT_LIST_HEAD(&tcon->pending_opens);
tcon->status = TID_GOOD;
- /* schedule query interfaces poll */
INIT_DELAYED_WORK(&tcon->query_interfaces,
smb2_query_server_interfaces);
- queue_delayed_work(cifsiod_wq, &tcon->query_interfaces,
- (SMB_INTERFACE_POLL_INTERVAL * HZ));
+ if (ses->server->dialect >= SMB30_PROT_ID &&
+ (ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
+ /* schedule query interfaces poll */
+ queue_delayed_work(cifsiod_wq, &tcon->query_interfaces,
+ (SMB_INTERFACE_POLL_INTERVAL * HZ));
+ }
spin_lock(&cifs_tcp_ses_lock);
list_add(&tcon->tcon_list, &ses->tcon_list);
diff --git a/fs/cifs/dfs.c b/fs/cifs/dfs.c
index b541e68378f6..b64d20374b9c 100644
--- a/fs/cifs/dfs.c
+++ b/fs/cifs/dfs.c
@@ -327,8 +327,8 @@ static int update_server_fullpath(struct TCP_Server_Info *server, struct cifs_sb
return rc;
}
-static int target_share_matches_server(struct TCP_Server_Info *server, const char *tcp_host,
- size_t tcp_host_len, char *share, bool *target_match)
+static int target_share_matches_server(struct TCP_Server_Info *server, char *share,
+ bool *target_match)
{
int rc = 0;
const char *dfs_host;
@@ -338,13 +338,16 @@ static int target_share_matches_server(struct TCP_Server_Info *server, const cha
extract_unc_hostname(share, &dfs_host, &dfs_host_len);
/* Check if hostnames or addresses match */
- if (dfs_host_len != tcp_host_len || strncasecmp(dfs_host, tcp_host, dfs_host_len) != 0) {
- cifs_dbg(FYI, "%s: %.*s doesn't match %.*s\n", __func__, (int)dfs_host_len,
- dfs_host, (int)tcp_host_len, tcp_host);
+ cifs_server_lock(server);
+ if (dfs_host_len != strlen(server->hostname) ||
+ strncasecmp(dfs_host, server->hostname, dfs_host_len)) {
+ cifs_dbg(FYI, "%s: %.*s doesn't match %s\n", __func__,
+ (int)dfs_host_len, dfs_host, server->hostname);
rc = match_target_ip(server, dfs_host, dfs_host_len, target_match);
if (rc)
cifs_dbg(VFS, "%s: failed to match target ip: %d\n", __func__, rc);
}
+ cifs_server_unlock(server);
return rc;
}
@@ -358,13 +361,9 @@ static int __tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *t
struct cifs_ses *root_ses = CIFS_DFS_ROOT_SES(tcon->ses);
struct cifs_tcon *ipc = root_ses->tcon_ipc;
char *share = NULL, *prefix = NULL;
- const char *tcp_host;
- size_t tcp_host_len;
struct dfs_cache_tgt_iterator *tit;
bool target_match;
- extract_unc_hostname(server->hostname, &tcp_host, &tcp_host_len);
-
tit = dfs_cache_get_tgt_iterator(tl);
if (!tit) {
rc = -ENOENT;
@@ -387,8 +386,7 @@ static int __tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *t
break;
}
- rc = target_share_matches_server(server, tcp_host, tcp_host_len, share,
- &target_match);
+ rc = target_share_matches_server(server, share, &target_match);
if (rc)
break;
if (!target_match) {
@@ -401,8 +399,7 @@ static int __tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *t
if (ipc->need_reconnect) {
scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname);
rc = ops->tree_connect(xid, ipc->ses, tree, ipc, cifs_sb->local_nls);
- if (rc)
- break;
+ cifs_dbg(FYI, "%s: reconnect ipc: %d\n", __func__, rc);
}
scnprintf(tree, MAX_TREE_SIZE, "\\%s", share);
@@ -498,7 +495,9 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru
}
if (tcon->ipc) {
+ cifs_server_lock(server);
scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname);
+ cifs_server_unlock(server);
rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc);
goto out;
}
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index 43ad1176dcb9..ac86bd0ebd63 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -269,7 +269,7 @@ static int dfscache_proc_show(struct seq_file *m, void *v)
list_for_each_entry(t, &ce->tlist, list) {
seq_printf(m, " %s%s\n",
t->name,
- ce->tgthint == t ? " (target hint)" : "");
+ READ_ONCE(ce->tgthint) == t ? " (target hint)" : "");
}
}
}
@@ -321,7 +321,7 @@ static inline void dump_tgts(const struct cache_entry *ce)
cifs_dbg(FYI, "target list:\n");
list_for_each_entry(t, &ce->tlist, list) {
cifs_dbg(FYI, " %s%s\n", t->name,
- ce->tgthint == t ? " (target hint)" : "");
+ READ_ONCE(ce->tgthint) == t ? " (target hint)" : "");
}
}
@@ -427,7 +427,7 @@ static int cache_entry_hash(const void *data, int size, unsigned int *hash)
/* Return target hint of a DFS cache entry */
static inline char *get_tgt_name(const struct cache_entry *ce)
{
- struct cache_dfs_tgt *t = ce->tgthint;
+ struct cache_dfs_tgt *t = READ_ONCE(ce->tgthint);
return t ? t->name : ERR_PTR(-ENOENT);
}
@@ -470,6 +470,7 @@ static struct cache_dfs_tgt *alloc_target(const char *name, int path_consumed)
static int copy_ref_data(const struct dfs_info3_param *refs, int numrefs,
struct cache_entry *ce, const char *tgthint)
{
+ struct cache_dfs_tgt *target;
int i;
ce->ttl = max_t(int, refs[0].ttl, CACHE_MIN_TTL);
@@ -496,8 +497,9 @@ static int copy_ref_data(const struct dfs_info3_param *refs, int numrefs,
ce->numtgts++;
}
- ce->tgthint = list_first_entry_or_null(&ce->tlist,
- struct cache_dfs_tgt, list);
+ target = list_first_entry_or_null(&ce->tlist, struct cache_dfs_tgt,
+ list);
+ WRITE_ONCE(ce->tgthint, target);
return 0;
}
@@ -558,7 +560,8 @@ static void remove_oldest_entry_locked(void)
}
/* Add a new DFS cache entry */
-static int add_cache_entry_locked(struct dfs_info3_param *refs, int numrefs)
+static struct cache_entry *add_cache_entry_locked(struct dfs_info3_param *refs,
+ int numrefs)
{
int rc;
struct cache_entry *ce;
@@ -573,11 +576,11 @@ static int add_cache_entry_locked(struct dfs_info3_param *refs, int numrefs)
rc = cache_entry_hash(refs[0].path_name, strlen(refs[0].path_name), &hash);
if (rc)
- return rc;
+ return ERR_PTR(rc);
ce = alloc_cache_entry(refs, numrefs);
if (IS_ERR(ce))
- return PTR_ERR(ce);
+ return ce;
spin_lock(&cache_ttl_lock);
if (!cache_ttl) {
@@ -594,7 +597,7 @@ static int add_cache_entry_locked(struct dfs_info3_param *refs, int numrefs)
atomic_inc(&cache_count);
- return 0;
+ return ce;
}
/* Check if two DFS paths are equal. @s1 and @s2 are expected to be in @cache_cp's charset */
@@ -641,7 +644,9 @@ static struct cache_entry *__lookup_cache_entry(const char *path, unsigned int h
*
* Use whole path components in the match. Must be called with htable_rw_lock held.
*
+ * Return cached entry if successful.
* Return ERR_PTR(-ENOENT) if the entry is not found.
+ * Return error ptr otherwise.
*/
static struct cache_entry *lookup_cache_entry(const char *path)
{
@@ -711,14 +716,15 @@ void dfs_cache_destroy(void)
static int update_cache_entry_locked(struct cache_entry *ce, const struct dfs_info3_param *refs,
int numrefs)
{
+ struct cache_dfs_tgt *target;
+ char *th = NULL;
int rc;
- char *s, *th = NULL;
WARN_ON(!rwsem_is_locked(&htable_rw_lock));
- if (ce->tgthint) {
- s = ce->tgthint->name;
- th = kstrdup(s, GFP_ATOMIC);
+ target = READ_ONCE(ce->tgthint);
+ if (target) {
+ th = kstrdup(target->name, GFP_ATOMIC);
if (!th)
return -ENOMEM;
}
@@ -767,51 +773,75 @@ static int get_dfs_referral(const unsigned int xid, struct cifs_ses *ses, const
*
* For interlinks, cifs_mount() and expand_dfs_referral() are supposed to
* handle them properly.
+ *
+ * On success, return entry with acquired lock for reading, otherwise error ptr.
*/
-static int cache_refresh_path(const unsigned int xid, struct cifs_ses *ses, const char *path)
+static struct cache_entry *cache_refresh_path(const unsigned int xid,
+ struct cifs_ses *ses,
+ const char *path,
+ bool force_refresh)
{
- int rc;
- struct cache_entry *ce;
struct dfs_info3_param *refs = NULL;
+ struct cache_entry *ce;
int numrefs = 0;
- bool newent = false;
+ int rc;
cifs_dbg(FYI, "%s: search path: %s\n", __func__, path);
- down_write(&htable_rw_lock);
+ down_read(&htable_rw_lock);
ce = lookup_cache_entry(path);
if (!IS_ERR(ce)) {
- if (!cache_entry_expired(ce)) {
- dump_ce(ce);
- up_write(&htable_rw_lock);
- return 0;
- }
- } else {
- newent = true;
+ if (!force_refresh && !cache_entry_expired(ce))
+ return ce;
+ } else if (PTR_ERR(ce) != -ENOENT) {
+ up_read(&htable_rw_lock);
+ return ce;
}
/*
- * Either the entry was not found, or it is expired.
+ * Unlock shared access as we don't want to hold any locks while getting
+ * a new referral. The @ses used for performing the I/O could be
+ * reconnecting and it acquires @htable_rw_lock to look up the dfs cache
+ * in order to failover -- if necessary.
+ */
+ up_read(&htable_rw_lock);
+
+ /*
+ * Either the entry was not found, or it is expired, or it is a forced
+ * refresh.
* Request a new DFS referral in order to create or update a cache entry.
*/
rc = get_dfs_referral(xid, ses, path, &refs, &numrefs);
- if (rc)
- goto out_unlock;
+ if (rc) {
+ ce = ERR_PTR(rc);
+ goto out;
+ }
dump_refs(refs, numrefs);
- if (!newent) {
- rc = update_cache_entry_locked(ce, refs, numrefs);
- goto out_unlock;
+ down_write(&htable_rw_lock);
+ /* Re-check as another task might have it added or refreshed already */
+ ce = lookup_cache_entry(path);
+ if (!IS_ERR(ce)) {
+ if (force_refresh || cache_entry_expired(ce)) {
+ rc = update_cache_entry_locked(ce, refs, numrefs);
+ if (rc)
+ ce = ERR_PTR(rc);
+ }
+ } else if (PTR_ERR(ce) == -ENOENT) {
+ ce = add_cache_entry_locked(refs, numrefs);
}
- rc = add_cache_entry_locked(refs, numrefs);
+ if (IS_ERR(ce)) {
+ up_write(&htable_rw_lock);
+ goto out;
+ }
-out_unlock:
- up_write(&htable_rw_lock);
+ downgrade_write(&htable_rw_lock);
+out:
free_dfs_info_array(refs, numrefs);
- return rc;
+ return ce;
}
/*
@@ -878,7 +908,7 @@ static int get_targets(struct cache_entry *ce, struct dfs_cache_tgt_list *tl)
}
it->it_path_consumed = t->path_consumed;
- if (ce->tgthint == t)
+ if (READ_ONCE(ce->tgthint) == t)
list_add(&it->it_list, head);
else
list_add_tail(&it->it_list, head);
@@ -931,15 +961,8 @@ int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses, const struct nl
if (IS_ERR(npath))
return PTR_ERR(npath);
- rc = cache_refresh_path(xid, ses, npath);
- if (rc)
- goto out_free_path;
-
- down_read(&htable_rw_lock);
-
- ce = lookup_cache_entry(npath);
+ ce = cache_refresh_path(xid, ses, npath, false);
if (IS_ERR(ce)) {
- up_read(&htable_rw_lock);
rc = PTR_ERR(ce);
goto out_free_path;
}
@@ -1003,72 +1026,6 @@ out_unlock:
}
/**
- * dfs_cache_update_tgthint - update target hint of a DFS cache entry
- *
- * If it doesn't find the cache entry, then it will get a DFS referral for @path
- * and create a new entry.
- *
- * In case the cache entry exists but expired, it will get a DFS referral
- * for @path and then update the respective cache entry.
- *
- * @xid: syscall id
- * @ses: smb session
- * @cp: codepage
- * @remap: type of character remapping for paths
- * @path: path to lookup in DFS referral cache
- * @it: DFS target iterator
- *
- * Return zero if the target hint was updated successfully, otherwise non-zero.
- */
-int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses,
- const struct nls_table *cp, int remap, const char *path,
- const struct dfs_cache_tgt_iterator *it)
-{
- int rc;
- const char *npath;
- struct cache_entry *ce;
- struct cache_dfs_tgt *t;
-
- npath = dfs_cache_canonical_path(path, cp, remap);
- if (IS_ERR(npath))
- return PTR_ERR(npath);
-
- cifs_dbg(FYI, "%s: update target hint - path: %s\n", __func__, npath);
-
- rc = cache_refresh_path(xid, ses, npath);
- if (rc)
- goto out_free_path;
-
- down_write(&htable_rw_lock);
-
- ce = lookup_cache_entry(npath);
- if (IS_ERR(ce)) {
- rc = PTR_ERR(ce);
- goto out_unlock;
- }
-
- t = ce->tgthint;
-
- if (likely(!strcasecmp(it->it_name, t->name)))
- goto out_unlock;
-
- list_for_each_entry(t, &ce->tlist, list) {
- if (!strcasecmp(t->name, it->it_name)) {
- ce->tgthint = t;
- cifs_dbg(FYI, "%s: new target hint: %s\n", __func__,
- it->it_name);
- break;
- }
- }
-
-out_unlock:
- up_write(&htable_rw_lock);
-out_free_path:
- kfree(npath);
- return rc;
-}
-
-/**
* dfs_cache_noreq_update_tgthint - update target hint of a DFS cache entry
* without sending any requests to the currently connected server.
*
@@ -1092,21 +1049,20 @@ void dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt
cifs_dbg(FYI, "%s: path: %s\n", __func__, path);
- if (!down_write_trylock(&htable_rw_lock))
- return;
+ down_read(&htable_rw_lock);
ce = lookup_cache_entry(path);
if (IS_ERR(ce))
goto out_unlock;
- t = ce->tgthint;
+ t = READ_ONCE(ce->tgthint);
if (unlikely(!strcasecmp(it->it_name, t->name)))
goto out_unlock;
list_for_each_entry(t, &ce->tlist, list) {
if (!strcasecmp(t->name, it->it_name)) {
- ce->tgthint = t;
+ WRITE_ONCE(ce->tgthint, t);
cifs_dbg(FYI, "%s: new target hint: %s\n", __func__,
it->it_name);
break;
@@ -1114,7 +1070,7 @@ void dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt
}
out_unlock:
- up_write(&htable_rw_lock);
+ up_read(&htable_rw_lock);
}
/**
@@ -1299,7 +1255,6 @@ static bool target_share_equal(struct TCP_Server_Info *server, const char *s1, c
* Resolve share's hostname and check if server address matches. Otherwise just ignore it
* as we could not have upcall to resolve hostname or failed to convert ip address.
*/
- match = true;
extract_unc_hostname(s1, &host, &hostlen);
scnprintf(unc, sizeof(unc), "\\\\%.*s", (int)hostlen, host);
@@ -1321,35 +1276,37 @@ static bool target_share_equal(struct TCP_Server_Info *server, const char *s1, c
* Mark dfs tcon for reconnecting when the currently connected tcon does not match any of the new
* target shares in @refs.
*/
-static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cache_tgt_list *tl,
- const struct dfs_info3_param *refs, int numrefs)
+static void mark_for_reconnect_if_needed(struct TCP_Server_Info *server,
+ struct dfs_cache_tgt_list *old_tl,
+ struct dfs_cache_tgt_list *new_tl)
{
- struct dfs_cache_tgt_iterator *it;
- int i;
-
- for (it = dfs_cache_get_tgt_iterator(tl); it; it = dfs_cache_get_next_tgt(tl, it)) {
- for (i = 0; i < numrefs; i++) {
- if (target_share_equal(tcon->ses->server, dfs_cache_get_tgt_name(it),
- refs[i].node_name))
+ struct dfs_cache_tgt_iterator *oit, *nit;
+
+ for (oit = dfs_cache_get_tgt_iterator(old_tl); oit;
+ oit = dfs_cache_get_next_tgt(old_tl, oit)) {
+ for (nit = dfs_cache_get_tgt_iterator(new_tl); nit;
+ nit = dfs_cache_get_next_tgt(new_tl, nit)) {
+ if (target_share_equal(server,
+ dfs_cache_get_tgt_name(oit),
+ dfs_cache_get_tgt_name(nit)))
return;
}
}
cifs_dbg(FYI, "%s: no cached or matched targets. mark dfs share for reconnect.\n", __func__);
- cifs_signal_cifsd_for_reconnect(tcon->ses->server, true);
+ cifs_signal_cifsd_for_reconnect(server, true);
}
/* Refresh dfs referral of tcon and mark it for reconnect if needed */
static int __refresh_tcon(const char *path, struct cifs_tcon *tcon, bool force_refresh)
{
- struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl);
+ struct dfs_cache_tgt_list old_tl = DFS_CACHE_TGT_LIST_INIT(old_tl);
+ struct dfs_cache_tgt_list new_tl = DFS_CACHE_TGT_LIST_INIT(new_tl);
struct cifs_ses *ses = CIFS_DFS_ROOT_SES(tcon->ses);
struct cifs_tcon *ipc = ses->tcon_ipc;
- struct dfs_info3_param *refs = NULL;
bool needs_refresh = false;
struct cache_entry *ce;
unsigned int xid;
- int numrefs = 0;
int rc = 0;
xid = get_xid();
@@ -1358,9 +1315,8 @@ static int __refresh_tcon(const char *path, struct cifs_tcon *tcon, bool force_r
ce = lookup_cache_entry(path);
needs_refresh = force_refresh || IS_ERR(ce) || cache_entry_expired(ce);
if (!IS_ERR(ce)) {
- rc = get_targets(ce, &tl);
- if (rc)
- cifs_dbg(FYI, "%s: could not get dfs targets: %d\n", __func__, rc);
+ rc = get_targets(ce, &old_tl);
+ cifs_dbg(FYI, "%s: get_targets: %d\n", __func__, rc);
}
up_read(&htable_rw_lock);
@@ -1377,26 +1333,18 @@ static int __refresh_tcon(const char *path, struct cifs_tcon *tcon, bool force_r
}
spin_unlock(&ipc->tc_lock);
- rc = get_dfs_referral(xid, ses, path, &refs, &numrefs);
- if (!rc) {
- /* Create or update a cache entry with the new referral */
- dump_refs(refs, numrefs);
-
- down_write(&htable_rw_lock);
- ce = lookup_cache_entry(path);
- if (IS_ERR(ce))
- add_cache_entry_locked(refs, numrefs);
- else if (force_refresh || cache_entry_expired(ce))
- update_cache_entry_locked(ce, refs, numrefs);
- up_write(&htable_rw_lock);
-
- mark_for_reconnect_if_needed(tcon, &tl, refs, numrefs);
+ ce = cache_refresh_path(xid, ses, path, true);
+ if (!IS_ERR(ce)) {
+ rc = get_targets(ce, &new_tl);
+ up_read(&htable_rw_lock);
+ cifs_dbg(FYI, "%s: get_targets: %d\n", __func__, rc);
+ mark_for_reconnect_if_needed(tcon->ses->server, &old_tl, &new_tl);
}
out:
free_xid(xid);
- dfs_cache_free_tgts(&tl);
- free_dfs_info_array(refs, numrefs);
+ dfs_cache_free_tgts(&old_tl);
+ dfs_cache_free_tgts(&new_tl);
return rc;
}
diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h
index f7cff0be9327..be3b5a44cf82 100644
--- a/fs/cifs/dfs_cache.h
+++ b/fs/cifs/dfs_cache.h
@@ -35,9 +35,6 @@ int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses, const struct nl
struct dfs_cache_tgt_list *tgt_list);
int dfs_cache_noreq_find(const char *path, struct dfs_info3_param *ref,
struct dfs_cache_tgt_list *tgt_list);
-int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses,
- const struct nls_table *cp, int remap, const char *path,
- const struct dfs_cache_tgt_iterator *it);
void dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt_iterator *it);
int dfs_cache_get_tgt_referral(const char *path, const struct dfs_cache_tgt_iterator *it,
struct dfs_info3_param *ref);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index ad4208bf1e32..2b6076324ffc 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -529,7 +529,7 @@ out_free_xid:
return rc;
}
-int cifs_create(struct user_namespace *mnt_userns, struct inode *inode,
+int cifs_create(struct mnt_idmap *idmap, struct inode *inode,
struct dentry *direntry, umode_t mode, bool excl)
{
int rc;
@@ -579,7 +579,7 @@ out_free_xid:
return rc;
}
-int cifs_mknod(struct user_namespace *mnt_userns, struct inode *inode,
+int cifs_mknod(struct mnt_idmap *idmap, struct inode *inode,
struct dentry *direntry, umode_t mode, dev_t device_number)
{
int rc = -EPERM;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 22dfc1f8b4f1..2870e3b6ffe8 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -9,6 +9,7 @@
*
*/
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/backing-dev.h>
#include <linux/stat.h>
#include <linux/fcntl.h>
@@ -3889,7 +3890,7 @@ uncached_fill_pages(struct TCP_Server_Info *server,
rdata->got_bytes += result;
}
- return rdata->got_bytes > 0 && result != -ECONNABORTED ?
+ return result != -ECONNABORTED && rdata->got_bytes > 0 ?
rdata->got_bytes : result;
}
@@ -4665,7 +4666,7 @@ readpages_fill_pages(struct TCP_Server_Info *server,
rdata->got_bytes += result;
}
- return rdata->got_bytes > 0 && result != -ECONNABORTED ?
+ return result != -ECONNABORTED && rdata->got_bytes > 0 ?
rdata->got_bytes : result;
}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index f145a59af89b..11cdc7cfe0ba 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1910,7 +1910,7 @@ posix_mkdir_get_info:
}
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
-int cifs_mkdir(struct user_namespace *mnt_userns, struct inode *inode,
+int cifs_mkdir(struct mnt_idmap *idmap, struct inode *inode,
struct dentry *direntry, umode_t mode)
{
int rc = 0;
@@ -2138,7 +2138,7 @@ do_rename_exit:
}
int
-cifs_rename2(struct user_namespace *mnt_userns, struct inode *source_dir,
+cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir,
struct dentry *source_dentry, struct inode *target_dir,
struct dentry *target_dentry, unsigned int flags)
{
@@ -2496,7 +2496,7 @@ int cifs_revalidate_dentry(struct dentry *dentry)
return cifs_revalidate_mapping(inode);
}
-int cifs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int cifs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct dentry *dentry = path->dentry;
@@ -2537,7 +2537,7 @@ int cifs_getattr(struct user_namespace *mnt_userns, const struct path *path,
return rc;
}
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
stat->blksize = cifs_sb->ctx->bsize;
stat->ino = CIFS_I(inode)->uniqueid;
@@ -2752,7 +2752,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
attrs->ia_valid |= ATTR_FORCE;
- rc = setattr_prepare(&init_user_ns, direntry, attrs);
+ rc = setattr_prepare(&nop_mnt_idmap, direntry, attrs);
if (rc < 0)
goto out;
@@ -2859,7 +2859,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
fscache_resize_cookie(cifs_inode_cookie(inode), attrs->ia_size);
}
- setattr_copy(&init_user_ns, inode, attrs);
+ setattr_copy(&nop_mnt_idmap, inode, attrs);
mark_inode_dirty(inode);
/* force revalidate when any of these times are set since some
@@ -2903,7 +2903,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
attrs->ia_valid |= ATTR_FORCE;
- rc = setattr_prepare(&init_user_ns, direntry, attrs);
+ rc = setattr_prepare(&nop_mnt_idmap, direntry, attrs);
if (rc < 0)
goto cifs_setattr_exit;
@@ -3058,7 +3058,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
fscache_resize_cookie(cifs_inode_cookie(inode), attrs->ia_size);
}
- setattr_copy(&init_user_ns, inode, attrs);
+ setattr_copy(&nop_mnt_idmap, inode, attrs);
mark_inode_dirty(inode);
cifs_setattr_exit:
@@ -3068,7 +3068,7 @@ cifs_setattr_exit:
}
int
-cifs_setattr(struct user_namespace *mnt_userns, struct dentry *direntry,
+cifs_setattr(struct mnt_idmap *idmap, struct dentry *direntry,
struct iattr *attrs)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index bd374feeccaa..4510dea77be3 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -428,6 +428,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
oparms.disposition = FILE_CREATE;
oparms.fid = &fid;
oparms.reconnect = false;
+ oparms.mode = 0644;
rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL,
NULL, NULL);
@@ -568,7 +569,7 @@ cifs_hl_exit:
}
int
-cifs_symlink(struct user_namespace *mnt_userns, struct inode *inode,
+cifs_symlink(struct mnt_idmap *idmap, struct inode *inode,
struct dentry *direntry, const char *symname)
{
int rc = -EOPNOTSUPP;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 4d3c586785a5..2a19c7987c5b 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -1277,7 +1277,9 @@ int match_target_ip(struct TCP_Server_Info *server,
if (rc < 0)
return rc;
+ spin_lock(&server->srv_lock);
*result = cifs_match_ipaddr((struct sockaddr *)&server->dstaddr, (struct sockaddr *)&ss);
+ spin_unlock(&server->srv_lock);
cifs_dbg(FYI, "%s: ip addresses match: %u\n", __func__, *result);
return 0;
}
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 9e7d9f0baa18..c47b254f0d1e 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -292,9 +292,10 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
continue;
}
kref_get(&iface->refcount);
+ break;
}
- if (!list_entry_is_head(iface, &ses->iface_list, iface_head)) {
+ if (list_entry_is_head(iface, &ses->iface_list, iface_head)) {
rc = 1;
iface = NULL;
cifs_dbg(FYI, "unable to find a suitable iface\n");
@@ -814,6 +815,7 @@ int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
return -EINVAL;
}
if (tilen) {
+ kfree_sensitive(ses->auth_key.response);
ses->auth_key.response = kmemdup(bcc_ptr + tioffset, tilen,
GFP_KERNEL);
if (!ses->auth_key.response) {
@@ -1427,6 +1429,7 @@ sess_auth_kerberos(struct sess_data *sess_data)
goto out_put_spnego_key;
}
+ kfree_sensitive(ses->auth_key.response);
ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
GFP_KERNEL);
if (!ses->auth_key.response) {
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 50480751e521..4cb364454e13 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -562,17 +562,20 @@ static int cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
if ((rc == -EOPNOTSUPP) || (rc == -EINVAL)) {
rc = SMBQueryInformation(xid, tcon, full_path, &fi, cifs_sb->local_nls,
cifs_remap(cifs_sb));
- if (!rc)
- move_cifs_info_to_smb2(&data->fi, &fi);
*adjustTZ = true;
}
- if (!rc && (le32_to_cpu(fi.Attributes) & ATTR_REPARSE)) {
+ if (!rc) {
int tmprc;
int oplock = 0;
struct cifs_fid fid;
struct cifs_open_parms oparms;
+ move_cifs_info_to_smb2(&data->fi, &fi);
+
+ if (!(le32_to_cpu(fi.Attributes) & ATTR_REPARSE))
+ return 0;
+
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = FILE_READ_ATTRIBUTES;
@@ -716,17 +719,25 @@ cifs_mkdir_setinfo(struct inode *inode, const char *full_path,
static int cifs_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock,
void *buf)
{
- FILE_ALL_INFO *fi = buf;
+ struct cifs_open_info_data *data = buf;
+ FILE_ALL_INFO fi = {};
+ int rc;
if (!(oparms->tcon->ses->capabilities & CAP_NT_SMBS))
- return SMBLegacyOpen(xid, oparms->tcon, oparms->path,
- oparms->disposition,
- oparms->desired_access,
- oparms->create_options,
- &oparms->fid->netfid, oplock, fi,
- oparms->cifs_sb->local_nls,
- cifs_remap(oparms->cifs_sb));
- return CIFS_open(xid, oparms, oplock, fi);
+ rc = SMBLegacyOpen(xid, oparms->tcon, oparms->path,
+ oparms->disposition,
+ oparms->desired_access,
+ oparms->create_options,
+ &oparms->fid->netfid, oplock, &fi,
+ oparms->cifs_sb->local_nls,
+ cifs_remap(oparms->cifs_sb));
+ else
+ rc = CIFS_open(xid, oparms, oplock, &fi);
+
+ if (!rc && data)
+ move_cifs_info_to_smb2(&data->fi, &fi);
+
+ return rc;
}
static void
@@ -1050,7 +1061,7 @@ cifs_make_node(unsigned int xid, struct inode *inode,
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct inode *newinode = NULL;
int rc = -EPERM;
- FILE_ALL_INFO *buf = NULL;
+ struct cifs_open_info_data buf = {};
struct cifs_io_parms io_parms;
__u32 oplock = 0;
struct cifs_fid fid;
@@ -1082,14 +1093,14 @@ cifs_make_node(unsigned int xid, struct inode *inode,
cifs_sb->local_nls,
cifs_remap(cifs_sb));
if (rc)
- goto out;
+ return rc;
rc = cifs_get_inode_info_unix(&newinode, full_path,
inode->i_sb, xid);
if (rc == 0)
d_instantiate(dentry, newinode);
- goto out;
+ return rc;
}
/*
@@ -1097,19 +1108,13 @@ cifs_make_node(unsigned int xid, struct inode *inode,
* support block and char device (no socket & fifo)
*/
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL))
- goto out;
+ return rc;
if (!S_ISCHR(mode) && !S_ISBLK(mode))
- goto out;
+ return rc;
cifs_dbg(FYI, "sfu compat create special file\n");
- buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
- if (buf == NULL) {
- rc = -ENOMEM;
- goto out;
- }
-
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = GENERIC_WRITE;
@@ -1124,21 +1129,21 @@ cifs_make_node(unsigned int xid, struct inode *inode,
oplock = REQ_OPLOCK;
else
oplock = 0;
- rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, buf);
+ rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, &buf);
if (rc)
- goto out;
+ return rc;
/*
* BB Do not bother to decode buf since no local inode yet to put
* timestamps in, but we can reuse it safely.
*/
- pdev = (struct win_dev *)buf;
+ pdev = (struct win_dev *)&buf.fi;
io_parms.pid = current->tgid;
io_parms.tcon = tcon;
io_parms.offset = 0;
io_parms.length = sizeof(struct win_dev);
- iov[1].iov_base = buf;
+ iov[1].iov_base = &buf.fi;
iov[1].iov_len = sizeof(struct win_dev);
if (S_ISCHR(mode)) {
memcpy(pdev->type, "IntxCHR", 8);
@@ -1157,8 +1162,8 @@ cifs_make_node(unsigned int xid, struct inode *inode,
d_drop(dentry);
/* FIXME: add code here to set EAs */
-out:
- kfree(buf);
+
+ cifs_free_open_info(&buf);
return rc;
}
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index ba6cc50af390..9f1dd04b555a 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -7,6 +7,7 @@
*
*/
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/stat.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index dc160de7a6de..e6bcd2baf446 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -530,7 +530,6 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
p = buf;
spin_lock(&ses->iface_lock);
- ses->iface_count = 0;
/*
* Go through iface_list and do kref_put to remove
* any unused ifaces. ifaces in use will be removed
@@ -540,6 +539,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
iface_head) {
iface->is_active = 0;
kref_put(&iface->refcount, release_iface);
+ ses->iface_count--;
}
spin_unlock(&ses->iface_lock);
@@ -618,6 +618,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
/* just get a ref so that it doesn't get picked/freed */
iface->is_active = 1;
kref_get(&iface->refcount);
+ ses->iface_count++;
spin_unlock(&ses->iface_lock);
goto next_iface;
} else if (ret < 0) {
@@ -4488,17 +4489,12 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst,
/* copy pages form the old */
for (j = 0; j < npages; j++) {
- char *dst, *src;
unsigned int offset, len;
rqst_page_get_length(new, j, &len, &offset);
- dst = kmap_local_page(new->rq_pages[j]) + offset;
- src = kmap_local_page(old->rq_pages[j]) + offset;
-
- memcpy(dst, src, len);
- kunmap(new->rq_pages[j]);
- kunmap(old->rq_pages[j]);
+ memcpy_page(new->rq_pages[j], offset,
+ old->rq_pages[j], offset, len);
}
}
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index a5695748a89b..2c9ffa921e6f 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -541,9 +541,10 @@ static void
assemble_neg_contexts(struct smb2_negotiate_req *req,
struct TCP_Server_Info *server, unsigned int *total_len)
{
- char *pneg_ctxt;
- char *hostname = NULL;
unsigned int ctxt_len, neg_context_count;
+ struct TCP_Server_Info *pserver;
+ char *pneg_ctxt;
+ char *hostname;
if (*total_len > 200) {
/* In case length corrupted don't want to overrun smb buffer */
@@ -574,8 +575,9 @@ assemble_neg_contexts(struct smb2_negotiate_req *req,
* secondary channels don't have the hostname field populated
* use the hostname field in the primary channel instead
*/
- hostname = CIFS_SERVER_IS_CHAN(server) ?
- server->primary_server->hostname : server->hostname;
+ pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
+ cifs_server_lock(pserver);
+ hostname = pserver->hostname;
if (hostname && (hostname[0] != 0)) {
ctxt_len = build_netname_ctxt((struct smb2_netname_neg_context *)pneg_ctxt,
hostname);
@@ -584,6 +586,7 @@ assemble_neg_contexts(struct smb2_negotiate_req *req,
neg_context_count = 3;
} else
neg_context_count = 2;
+ cifs_server_unlock(pserver);
build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt);
*total_len += sizeof(struct smb2_posix_neg_context);
@@ -1450,6 +1453,7 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
/* keep session key if binding */
if (!is_binding) {
+ kfree_sensitive(ses->auth_key.response);
ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
GFP_KERNEL);
if (!ses->auth_key.response) {
@@ -1479,8 +1483,11 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
out_put_spnego_key:
key_invalidate(spnego_key);
key_put(spnego_key);
- if (rc)
+ if (rc) {
kfree_sensitive(ses->auth_key.response);
+ ses->auth_key.response = NULL;
+ ses->auth_key.len = 0;
+ }
out:
sess_data->result = rc;
sess_data->func = NULL;
@@ -4156,12 +4163,15 @@ smb2_readv_callback(struct mid_q_entry *mid)
(struct smb2_hdr *)rdata->iov[0].iov_base;
struct cifs_credits credits = { .value = 0, .instance = 0 };
struct smb_rqst rqst = { .rq_iov = &rdata->iov[1],
- .rq_nvec = 1,
- .rq_pages = rdata->pages,
- .rq_offset = rdata->page_offset,
- .rq_npages = rdata->nr_pages,
- .rq_pagesz = rdata->pagesz,
- .rq_tailsz = rdata->tailsz };
+ .rq_nvec = 1, };
+
+ if (rdata->got_bytes) {
+ rqst.rq_pages = rdata->pages;
+ rqst.rq_offset = rdata->page_offset;
+ rqst.rq_npages = rdata->nr_pages;
+ rqst.rq_pagesz = rdata->pagesz;
+ rqst.rq_tailsz = rdata->tailsz;
+ }
WARN_ONCE(rdata->server != mid->server,
"rdata server %p != mid server %p",
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index 90789aaa6567..8c816b25ce7c 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -1405,6 +1405,7 @@ void smbd_destroy(struct TCP_Server_Info *server)
destroy_workqueue(info->workqueue);
log_rdma_event(INFO, "rdma session destroyed\n");
kfree(info);
+ server->smbd_conn = NULL;
}
/*
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 5f2fb2fd2e37..50e762fa1a14 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -89,7 +89,7 @@ static int cifs_creation_time_set(unsigned int xid, struct cifs_tcon *pTcon,
}
static int cifs_xattr_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index 9be281bbcc06..dd6277d87afb 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -46,12 +46,12 @@ extern const struct file_operations coda_ioctl_operations;
/* operations shared over more than one file */
int coda_open(struct inode *i, struct file *f);
int coda_release(struct inode *i, struct file *f);
-int coda_permission(struct user_namespace *mnt_userns, struct inode *inode,
+int coda_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask);
int coda_revalidate_inode(struct inode *);
-int coda_getattr(struct user_namespace *, const struct path *, struct kstat *,
+int coda_getattr(struct mnt_idmap *, const struct path *, struct kstat *,
u32, unsigned int);
-int coda_setattr(struct user_namespace *, struct dentry *, struct iattr *);
+int coda_setattr(struct mnt_idmap *, struct dentry *, struct iattr *);
/* this file: helpers */
char *coda_f2s(struct CodaFid *f);
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 328d7a684b63..8450b1bd354b 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -73,7 +73,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, unsig
}
-int coda_permission(struct user_namespace *mnt_userns, struct inode *inode,
+int coda_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask)
{
int error;
@@ -133,7 +133,7 @@ static inline void coda_dir_drop_nlink(struct inode *dir)
}
/* creation routines: create, mknod, mkdir, link, symlink */
-static int coda_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int coda_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *de, umode_t mode, bool excl)
{
int error;
@@ -166,7 +166,7 @@ err_out:
return error;
}
-static int coda_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int coda_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *de, umode_t mode)
{
struct inode *inode;
@@ -228,7 +228,7 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode,
}
-static int coda_symlink(struct user_namespace *mnt_userns,
+static int coda_symlink(struct mnt_idmap *idmap,
struct inode *dir_inode, struct dentry *de,
const char *symname)
{
@@ -295,7 +295,7 @@ static int coda_rmdir(struct inode *dir, struct dentry *de)
}
/* rename */
-static int coda_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+static int coda_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 2185328b65c7..d661e6cf17ac 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -251,16 +251,16 @@ static void coda_evict_inode(struct inode *inode)
coda_cache_clear_inode(inode);
}
-int coda_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int coda_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags)
{
int err = coda_revalidate_inode(d_inode(path->dentry));
if (!err)
- generic_fillattr(&init_user_ns, d_inode(path->dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat);
return err;
}
-int coda_setattr(struct user_namespace *mnt_userns, struct dentry *de,
+int coda_setattr(struct mnt_idmap *idmap, struct dentry *de,
struct iattr *iattr)
{
struct inode *inode = d_inode(de);
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index cb9fd59a688c..36e35c15561a 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -24,7 +24,7 @@
#include "coda_linux.h"
/* pioctl ops */
-static int coda_ioctl_permission(struct user_namespace *mnt_userns,
+static int coda_ioctl_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask);
static long coda_pioctl(struct file *filp, unsigned int cmd,
unsigned long user_data);
@@ -41,7 +41,7 @@ const struct file_operations coda_ioctl_operations = {
};
/* the coda pioctl inode ops */
-static int coda_ioctl_permission(struct user_namespace *mnt_userns,
+static int coda_ioctl_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
return (mask & MAY_EXEC) ? -EACCES : 0;
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index c0395363eab9..e710a1782382 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -77,7 +77,7 @@ extern void configfs_hash_and_remove(struct dentry * dir, const char * name);
extern const unsigned char * configfs_get_name(struct configfs_dirent *sd);
extern void configfs_drop_dentry(struct configfs_dirent *sd, struct dentry *parent);
-extern int configfs_setattr(struct user_namespace *mnt_userns,
+extern int configfs_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *iattr);
extern struct dentry *configfs_pin_fs(void);
@@ -91,7 +91,7 @@ extern const struct inode_operations configfs_root_inode_operations;
extern const struct inode_operations configfs_symlink_inode_operations;
extern const struct dentry_operations configfs_dentry_ops;
-extern int configfs_symlink(struct user_namespace *mnt_userns,
+extern int configfs_symlink(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry,
const char *symname);
extern int configfs_unlink(struct inode *dir, struct dentry *dentry);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index ec6519e1ca3b..4afcbbe63e68 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1251,7 +1251,7 @@ out_root_unlock:
}
EXPORT_SYMBOL(configfs_depend_item_unlocked);
-static int configfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int configfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
int ret = 0;
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index b601610e9907..1c15edbe70ff 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -32,7 +32,7 @@ static const struct inode_operations configfs_inode_operations ={
.setattr = configfs_setattr,
};
-int configfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int configfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *iattr)
{
struct inode * inode = d_inode(dentry);
@@ -60,7 +60,7 @@ int configfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
}
/* attributes were changed atleast once in past */
- error = simple_setattr(mnt_userns, dentry, iattr);
+ error = simple_setattr(idmap, dentry, iattr);
if (error)
return error;
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 0623c3edcfb9..69133ec1fac2 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -137,7 +137,7 @@ static int get_target(const char *symname, struct path *path,
}
-int configfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+int configfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
int ret;
@@ -196,7 +196,7 @@ int configfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
if (dentry->d_inode || d_unhashed(dentry))
ret = -EEXIST;
else
- ret = inode_permission(&init_user_ns, dir,
+ ret = inode_permission(&nop_mnt_idmap, dir,
MAY_WRITE | MAY_EXEC);
if (!ret)
ret = type->ct_item_ops->allow_link(parent_item, target_item);
diff --git a/fs/coredump.c b/fs/coredump.c
index de78bde2991b..68619329ec65 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -644,7 +644,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
goto close_fail;
}
} else {
- struct user_namespace *mnt_userns;
+ struct mnt_idmap *idmap;
struct inode *inode;
int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW |
O_LARGEFILE | O_EXCL;
@@ -722,8 +722,8 @@ void do_coredump(const kernel_siginfo_t *siginfo)
* a process dumps core while its cwd is e.g. on a vfat
* filesystem.
*/
- mnt_userns = file_mnt_user_ns(cprm.file);
- if (!vfsuid_eq_kuid(i_uid_into_vfsuid(mnt_userns, inode),
+ idmap = file_mnt_idmap(cprm.file);
+ if (!vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode),
current_fsuid())) {
pr_info_ratelimited("Core dump to %s aborted: cannot preserve file owner\n",
cn.corename);
@@ -736,7 +736,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
}
if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
goto close_fail;
- if (do_truncate(mnt_userns, cprm.file->f_path.dentry,
+ if (do_truncate(idmap, cprm.file->f_path.dentry,
0, 0, cprm.file))
goto close_fail;
}
@@ -838,6 +838,30 @@ static int __dump_skip(struct coredump_params *cprm, size_t nr)
}
}
+int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
+{
+ if (cprm->to_skip) {
+ if (!__dump_skip(cprm, cprm->to_skip))
+ return 0;
+ cprm->to_skip = 0;
+ }
+ return __dump_emit(cprm, addr, nr);
+}
+EXPORT_SYMBOL(dump_emit);
+
+void dump_skip_to(struct coredump_params *cprm, unsigned long pos)
+{
+ cprm->to_skip = pos - cprm->pos;
+}
+EXPORT_SYMBOL(dump_skip_to);
+
+void dump_skip(struct coredump_params *cprm, size_t nr)
+{
+ cprm->to_skip += nr;
+}
+EXPORT_SYMBOL(dump_skip);
+
+#ifdef CONFIG_ELF_CORE
static int dump_emit_page(struct coredump_params *cprm, struct page *page)
{
struct bio_vec bvec = {
@@ -871,30 +895,6 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page)
return 1;
}
-int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
-{
- if (cprm->to_skip) {
- if (!__dump_skip(cprm, cprm->to_skip))
- return 0;
- cprm->to_skip = 0;
- }
- return __dump_emit(cprm, addr, nr);
-}
-EXPORT_SYMBOL(dump_emit);
-
-void dump_skip_to(struct coredump_params *cprm, unsigned long pos)
-{
- cprm->to_skip = pos - cprm->pos;
-}
-EXPORT_SYMBOL(dump_skip_to);
-
-void dump_skip(struct coredump_params *cprm, size_t nr)
-{
- cprm->to_skip += nr;
-}
-EXPORT_SYMBOL(dump_skip);
-
-#ifdef CONFIG_ELF_CORE
int dump_user_range(struct coredump_params *cprm, unsigned long start,
unsigned long len)
{
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index 1b4403136d05..d57d0a020f71 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -30,13 +30,11 @@
*/
bool fscrypt_decrypt_bio(struct bio *bio)
{
- struct bio_vec *bv;
- struct bvec_iter_all iter_all;
+ struct folio_iter fi;
- bio_for_each_segment_all(bv, bio, iter_all) {
- struct page *page = bv->bv_page;
- int err = fscrypt_decrypt_pagecache_blocks(page, bv->bv_len,
- bv->bv_offset);
+ bio_for_each_folio_all(fi, bio) {
+ int err = fscrypt_decrypt_pagecache_blocks(fi.folio, fi.length,
+ fi.offset);
if (err) {
bio->bi_status = errno_to_blk_status(err);
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index e78be66bbf01..bf642479269a 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -237,41 +237,43 @@ EXPORT_SYMBOL(fscrypt_encrypt_block_inplace);
/**
* fscrypt_decrypt_pagecache_blocks() - Decrypt filesystem blocks in a
- * pagecache page
- * @page: The locked pagecache page containing the block(s) to decrypt
+ * pagecache folio
+ * @folio: The locked pagecache folio containing the block(s) to decrypt
* @len: Total size of the block(s) to decrypt. Must be a nonzero
* multiple of the filesystem's block size.
- * @offs: Byte offset within @page of the first block to decrypt. Must be
+ * @offs: Byte offset within @folio of the first block to decrypt. Must be
* a multiple of the filesystem's block size.
*
- * The specified block(s) are decrypted in-place within the pagecache page,
- * which must still be locked and not uptodate. Normally, blocksize ==
- * PAGE_SIZE and the whole page is decrypted at once.
+ * The specified block(s) are decrypted in-place within the pagecache folio,
+ * which must still be locked and not uptodate.
*
* This is for use by the filesystem's ->readahead() method.
*
* Return: 0 on success; -errno on failure
*/
-int fscrypt_decrypt_pagecache_blocks(struct page *page, unsigned int len,
- unsigned int offs)
+int fscrypt_decrypt_pagecache_blocks(struct folio *folio, size_t len,
+ size_t offs)
{
- const struct inode *inode = page->mapping->host;
+ const struct inode *inode = folio->mapping->host;
const unsigned int blockbits = inode->i_blkbits;
const unsigned int blocksize = 1 << blockbits;
- u64 lblk_num = ((u64)page->index << (PAGE_SHIFT - blockbits)) +
+ u64 lblk_num = ((u64)folio->index << (PAGE_SHIFT - blockbits)) +
(offs >> blockbits);
- unsigned int i;
+ size_t i;
int err;
- if (WARN_ON_ONCE(!PageLocked(page)))
+ if (WARN_ON_ONCE(!folio_test_locked(folio)))
return -EINVAL;
if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offs, blocksize)))
return -EINVAL;
for (i = offs; i < offs + len; i += blocksize, lblk_num++) {
+ struct page *page = folio_page(folio, i >> PAGE_SHIFT);
+
err = fscrypt_crypt_block(inode, FS_DECRYPT, lblk_num, page,
- page, blocksize, i, GFP_NOFS);
+ page, blocksize, i & ~PAGE_MASK,
+ GFP_NOFS);
if (err)
return err;
}
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 316a778cec0f..0fec2dfc36eb 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -573,6 +573,9 @@ fscrypt_find_master_key(struct super_block *sb,
int fscrypt_get_test_dummy_key_identifier(
u8 key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]);
+int fscrypt_add_test_dummy_key(struct super_block *sb,
+ struct fscrypt_key_specifier *key_spec);
+
int fscrypt_verify_key_added(struct super_block *sb,
const u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]);
@@ -651,6 +654,7 @@ bool fscrypt_policies_equal(const union fscrypt_policy *policy1,
const union fscrypt_policy *policy2);
int fscrypt_policy_to_key_spec(const union fscrypt_policy *policy,
struct fscrypt_key_specifier *key_spec);
+const union fscrypt_policy *fscrypt_get_dummy_policy(struct super_block *sb);
bool fscrypt_supported_policy(const union fscrypt_policy *policy_u,
const struct inode *inode);
int fscrypt_policy_from_context(union fscrypt_policy *policy_u,
diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c
index 78dd2ff306bd..78086f8dbda5 100644
--- a/fs/crypto/keyring.c
+++ b/fs/crypto/keyring.c
@@ -211,10 +211,6 @@ static int allocate_filesystem_keyring(struct super_block *sb)
* are still available at this time; this is important because after user file
* accesses have been allowed, this function may need to evict keys from the
* keyslots of an inline crypto engine, which requires the block device(s).
- *
- * This is also called when the super_block is being freed. This is needed to
- * avoid a memory leak if mounting fails after the "test_dummy_encryption"
- * option was processed, as in that case the unmount-time call isn't made.
*/
void fscrypt_destroy_keyring(struct super_block *sb)
{
@@ -778,34 +774,26 @@ out:
/**
* fscrypt_add_test_dummy_key() - add the test dummy encryption key
* @sb: the filesystem instance to add the key to
- * @dummy_policy: the encryption policy for test_dummy_encryption
+ * @key_spec: the key specifier of the test dummy encryption key
*
- * If needed, add the key for the test_dummy_encryption mount option to the
- * filesystem. To prevent misuse of this mount option, a per-boot random key is
- * used instead of a hardcoded one. This makes it so that any encrypted files
- * created using this option won't be accessible after a reboot.
+ * Add the key for the test_dummy_encryption mount option to the filesystem. To
+ * prevent misuse of this mount option, a per-boot random key is used instead of
+ * a hardcoded one. This makes it so that any encrypted files created using
+ * this option won't be accessible after a reboot.
*
* Return: 0 on success, -errno on failure
*/
int fscrypt_add_test_dummy_key(struct super_block *sb,
- const struct fscrypt_dummy_policy *dummy_policy)
+ struct fscrypt_key_specifier *key_spec)
{
- const union fscrypt_policy *policy = dummy_policy->policy;
- struct fscrypt_key_specifier key_spec;
struct fscrypt_master_key_secret secret;
int err;
- if (!policy)
- return 0;
- err = fscrypt_policy_to_key_spec(policy, &key_spec);
- if (err)
- return err;
fscrypt_get_test_dummy_secret(&secret);
- err = add_master_key(sb, &secret, &key_spec);
+ err = add_master_key(sb, &secret, key_spec);
wipe_master_key_secret(&secret);
return err;
}
-EXPORT_SYMBOL_GPL(fscrypt_add_test_dummy_key);
/*
* Verify that the current user has added a master key with the given identifier
diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c
index 94757ccd3056..aa94fba9d17e 100644
--- a/fs/crypto/keysetup.c
+++ b/fs/crypto/keysetup.c
@@ -438,6 +438,7 @@ static int setup_file_encryption_key(struct fscrypt_info *ci,
bool need_dirhash_key,
struct fscrypt_master_key **mk_ret)
{
+ struct super_block *sb = ci->ci_inode->i_sb;
struct fscrypt_key_specifier mk_spec;
struct fscrypt_master_key *mk;
int err;
@@ -450,8 +451,26 @@ static int setup_file_encryption_key(struct fscrypt_info *ci,
if (err)
return err;
- mk = fscrypt_find_master_key(ci->ci_inode->i_sb, &mk_spec);
- if (!mk) {
+ mk = fscrypt_find_master_key(sb, &mk_spec);
+ if (unlikely(!mk)) {
+ const union fscrypt_policy *dummy_policy =
+ fscrypt_get_dummy_policy(sb);
+
+ /*
+ * Add the test_dummy_encryption key on-demand. In principle,
+ * it should be added at mount time. Do it here instead so that
+ * the individual filesystems don't need to worry about adding
+ * this key at mount time and cleaning up on mount failure.
+ */
+ if (dummy_policy &&
+ fscrypt_policies_equal(dummy_policy, &ci->ci_policy)) {
+ err = fscrypt_add_test_dummy_key(sb, &mk_spec);
+ if (err)
+ return err;
+ mk = fscrypt_find_master_key(sb, &mk_spec);
+ }
+ }
+ if (unlikely(!mk)) {
if (ci->ci_policy.version != FSCRYPT_POLICY_V1)
return -ENOKEY;
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 893661b52376..3b5fcb6402ea 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -53,8 +53,7 @@ int fscrypt_policy_to_key_spec(const union fscrypt_policy *policy,
}
}
-static const union fscrypt_policy *
-fscrypt_get_dummy_policy(struct super_block *sb)
+const union fscrypt_policy *fscrypt_get_dummy_policy(struct super_block *sb)
{
if (!sb->s_cop->get_dummy_policy)
return NULL;
@@ -506,7 +505,7 @@ int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg)
return -EFAULT;
policy.version = version;
- if (!inode_owner_or_capable(&init_user_ns, inode))
+ if (!inode_owner_or_capable(&nop_mnt_idmap, inode))
return -EACCES;
ret = mnt_want_write_file(filp);
diff --git a/fs/dax.c b/fs/dax.c
index c48a3a93ab29..3e457a16c7d1 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1271,8 +1271,9 @@ static s64 dax_unshare_iter(struct iomap_iter *iter)
if (ret < 0)
goto out_unlock;
- ret = copy_mc_to_kernel(daddr, saddr, length);
- if (ret)
+ if (copy_mc_to_kernel(daddr, saddr, length) == 0)
+ ret = length;
+ else
ret = -EIO;
out_unlock:
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 2e8e112b1993..bf397f6a6a33 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -42,7 +42,7 @@ static unsigned int debugfs_allow __ro_after_init = DEFAULT_DEBUGFS_ALLOW_BITS;
* so that we can use the file mode as part of a heuristic to determine whether
* to lock down individual files.
*/
-static int debugfs_setattr(struct user_namespace *mnt_userns,
+static int debugfs_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *ia)
{
int ret;
@@ -52,7 +52,7 @@ static int debugfs_setattr(struct user_namespace *mnt_userns,
if (ret)
return ret;
}
- return simple_setattr(&init_user_ns, dentry, ia);
+ return simple_setattr(&nop_mnt_idmap, dentry, ia);
}
static const struct inode_operations debugfs_file_inode_operations = {
@@ -837,7 +837,7 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
take_dentry_name_snapshot(&old_name, old_dentry);
- error = simple_rename(&init_user_ns, d_inode(old_dir), old_dentry,
+ error = simple_rename(&nop_mnt_idmap, d_inode(old_dir), old_dentry,
d_inode(new_dir), dentry, 0);
if (error) {
release_dentry_name_snapshot(&old_name);
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 737f185aad8d..ed4357e62f35 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -4,6 +4,7 @@
*/
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/miscdevice.h>
#include <linux/poll.h>
#include <linux/dlm.h>
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index e3f5d7f3c8a0..bd3f3c755b24 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1105,7 +1105,7 @@ ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry,
}
inode_lock(lower_inode);
- rc = __vfs_setxattr(&init_user_ns, lower_dentry, lower_inode,
+ rc = __vfs_setxattr(&nop_mnt_idmap, lower_dentry, lower_inode,
ECRYPTFS_XATTR_NAME, page_virt, size, 0);
if (!rc && ecryptfs_inode)
fsstack_copy_attr_all(ecryptfs_inode, lower_inode);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index f3cd00fac9c3..144ace9e0dd9 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -139,7 +139,7 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
if (d_unhashed(lower_dentry))
rc = -EINVAL;
else
- rc = vfs_unlink(&init_user_ns, lower_dir, lower_dentry,
+ rc = vfs_unlink(&nop_mnt_idmap, lower_dir, lower_dentry,
NULL);
}
if (rc) {
@@ -180,7 +180,7 @@ ecryptfs_do_create(struct inode *directory_inode,
rc = lock_parent(ecryptfs_dentry, &lower_dentry, &lower_dir);
if (!rc)
- rc = vfs_create(&init_user_ns, lower_dir,
+ rc = vfs_create(&nop_mnt_idmap, lower_dir,
lower_dentry, mode, true);
if (rc) {
printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
@@ -191,7 +191,7 @@ ecryptfs_do_create(struct inode *directory_inode,
inode = __ecryptfs_get_inode(d_inode(lower_dentry),
directory_inode->i_sb);
if (IS_ERR(inode)) {
- vfs_unlink(&init_user_ns, lower_dir, lower_dentry, NULL);
+ vfs_unlink(&nop_mnt_idmap, lower_dir, lower_dentry, NULL);
goto out_lock;
}
fsstack_copy_attr_times(directory_inode, lower_dir);
@@ -253,7 +253,7 @@ out:
* Returns zero on success; non-zero on error condition
*/
static int
-ecryptfs_create(struct user_namespace *mnt_userns,
+ecryptfs_create(struct mnt_idmap *idmap,
struct inode *directory_inode, struct dentry *ecryptfs_dentry,
umode_t mode, bool excl)
{
@@ -434,7 +434,7 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
rc = lock_parent(new_dentry, &lower_new_dentry, &lower_dir);
if (!rc)
- rc = vfs_link(lower_old_dentry, &init_user_ns, lower_dir,
+ rc = vfs_link(lower_old_dentry, &nop_mnt_idmap, lower_dir,
lower_new_dentry, NULL);
if (rc || d_really_is_negative(lower_new_dentry))
goto out_lock;
@@ -456,7 +456,7 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
return ecryptfs_do_unlink(dir, dentry, d_inode(dentry));
}
-static int ecryptfs_symlink(struct user_namespace *mnt_userns,
+static int ecryptfs_symlink(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry,
const char *symname)
{
@@ -478,7 +478,7 @@ static int ecryptfs_symlink(struct user_namespace *mnt_userns,
strlen(symname));
if (rc)
goto out_lock;
- rc = vfs_symlink(&init_user_ns, lower_dir, lower_dentry,
+ rc = vfs_symlink(&nop_mnt_idmap, lower_dir, lower_dentry,
encoded_symname);
kfree(encoded_symname);
if (rc || d_really_is_negative(lower_dentry))
@@ -495,7 +495,7 @@ out_lock:
return rc;
}
-static int ecryptfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int ecryptfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
int rc;
@@ -504,7 +504,7 @@ static int ecryptfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
rc = lock_parent(dentry, &lower_dentry, &lower_dir);
if (!rc)
- rc = vfs_mkdir(&init_user_ns, lower_dir,
+ rc = vfs_mkdir(&nop_mnt_idmap, lower_dir,
lower_dentry, mode);
if (rc || d_really_is_negative(lower_dentry))
goto out;
@@ -533,7 +533,7 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
if (d_unhashed(lower_dentry))
rc = -EINVAL;
else
- rc = vfs_rmdir(&init_user_ns, lower_dir, lower_dentry);
+ rc = vfs_rmdir(&nop_mnt_idmap, lower_dir, lower_dentry);
}
if (!rc) {
clear_nlink(d_inode(dentry));
@@ -548,7 +548,7 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
}
static int
-ecryptfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ecryptfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t dev)
{
int rc;
@@ -557,7 +557,7 @@ ecryptfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
rc = lock_parent(dentry, &lower_dentry, &lower_dir);
if (!rc)
- rc = vfs_mknod(&init_user_ns, lower_dir,
+ rc = vfs_mknod(&nop_mnt_idmap, lower_dir,
lower_dentry, mode, dev);
if (rc || d_really_is_negative(lower_dentry))
goto out;
@@ -574,7 +574,7 @@ out:
}
static int
-ecryptfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ecryptfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
@@ -616,10 +616,10 @@ ecryptfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
goto out_lock;
}
- rd.old_mnt_userns = &init_user_ns;
+ rd.old_mnt_idmap = &nop_mnt_idmap;
rd.old_dir = d_inode(lower_old_dir_dentry);
rd.old_dentry = lower_old_dentry;
- rd.new_mnt_userns = &init_user_ns;
+ rd.new_mnt_idmap = &nop_mnt_idmap;
rd.new_dir = d_inode(lower_new_dir_dentry);
rd.new_dentry = lower_new_dentry;
rc = vfs_rename(&rd);
@@ -856,7 +856,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
inode_lock(d_inode(lower_dentry));
- rc = notify_change(&init_user_ns, lower_dentry,
+ rc = notify_change(&nop_mnt_idmap, lower_dentry,
&lower_ia, NULL);
inode_unlock(d_inode(lower_dentry));
}
@@ -864,16 +864,16 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
}
static int
-ecryptfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+ecryptfs_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask)
{
- return inode_permission(&init_user_ns,
+ return inode_permission(&nop_mnt_idmap,
ecryptfs_inode_to_lower(inode), mask);
}
/**
* ecryptfs_setattr
- * @mnt_userns: user namespace of the target mount
+ * @idmap: idmap of the target mount
* @dentry: dentry handle to the inode to modify
* @ia: Structure with flags of what to change and values
*
@@ -884,7 +884,7 @@ ecryptfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
* All other metadata changes will be passed right to the lower filesystem,
* and we will just update our inode to look like the lower.
*/
-static int ecryptfs_setattr(struct user_namespace *mnt_userns,
+static int ecryptfs_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *ia)
{
int rc = 0;
@@ -939,7 +939,7 @@ static int ecryptfs_setattr(struct user_namespace *mnt_userns,
}
mutex_unlock(&crypt_stat->cs_mutex);
- rc = setattr_prepare(&init_user_ns, dentry, ia);
+ rc = setattr_prepare(&nop_mnt_idmap, dentry, ia);
if (rc)
goto out;
if (ia->ia_valid & ATTR_SIZE) {
@@ -965,14 +965,14 @@ static int ecryptfs_setattr(struct user_namespace *mnt_userns,
lower_ia.ia_valid &= ~ATTR_MODE;
inode_lock(d_inode(lower_dentry));
- rc = notify_change(&init_user_ns, lower_dentry, &lower_ia, NULL);
+ rc = notify_change(&nop_mnt_idmap, lower_dentry, &lower_ia, NULL);
inode_unlock(d_inode(lower_dentry));
out:
fsstack_copy_attr_all(inode, lower_inode);
return rc;
}
-static int ecryptfs_getattr_link(struct user_namespace *mnt_userns,
+static int ecryptfs_getattr_link(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
@@ -982,7 +982,7 @@ static int ecryptfs_getattr_link(struct user_namespace *mnt_userns,
mount_crypt_stat = &ecryptfs_superblock_to_private(
dentry->d_sb)->mount_crypt_stat;
- generic_fillattr(&init_user_ns, d_inode(dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat);
if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) {
char *target;
size_t targetsiz;
@@ -998,7 +998,7 @@ static int ecryptfs_getattr_link(struct user_namespace *mnt_userns,
return rc;
}
-static int ecryptfs_getattr(struct user_namespace *mnt_userns,
+static int ecryptfs_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
@@ -1011,7 +1011,7 @@ static int ecryptfs_getattr(struct user_namespace *mnt_userns,
if (!rc) {
fsstack_copy_attr_all(d_inode(dentry),
ecryptfs_inode_to_lower(d_inode(dentry)));
- generic_fillattr(&init_user_ns, d_inode(dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat);
stat->blocks = lower_stat.blocks;
}
return rc;
@@ -1033,7 +1033,7 @@ ecryptfs_setxattr(struct dentry *dentry, struct inode *inode,
goto out;
}
inode_lock(lower_inode);
- rc = __vfs_setxattr_locked(&init_user_ns, lower_dentry, name, value, size, flags, NULL);
+ rc = __vfs_setxattr_locked(&nop_mnt_idmap, lower_dentry, name, value, size, flags, NULL);
inode_unlock(lower_inode);
if (!rc && inode)
fsstack_copy_attr_all(inode, lower_inode);
@@ -1099,7 +1099,7 @@ static int ecryptfs_removexattr(struct dentry *dentry, struct inode *inode,
goto out;
}
inode_lock(lower_inode);
- rc = __vfs_removexattr(&init_user_ns, lower_dentry, name);
+ rc = __vfs_removexattr(&nop_mnt_idmap, lower_dentry, name);
inode_unlock(lower_inode);
out:
return rc;
@@ -1110,26 +1110,26 @@ static int ecryptfs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
return vfs_fileattr_get(ecryptfs_dentry_to_lower(dentry), fa);
}
-static int ecryptfs_fileattr_set(struct user_namespace *mnt_userns,
+static int ecryptfs_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa)
{
struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
int rc;
- rc = vfs_fileattr_set(&init_user_ns, lower_dentry, fa);
+ rc = vfs_fileattr_set(&nop_mnt_idmap, lower_dentry, fa);
fsstack_copy_attr_all(d_inode(dentry), d_inode(lower_dentry));
return rc;
}
-static struct posix_acl *ecryptfs_get_acl(struct user_namespace *mnt_userns,
+static struct posix_acl *ecryptfs_get_acl(struct mnt_idmap *idmap,
struct dentry *dentry, int type)
{
- return vfs_get_acl(mnt_userns, ecryptfs_dentry_to_lower(dentry),
+ return vfs_get_acl(idmap, ecryptfs_dentry_to_lower(dentry),
posix_acl_xattr_name(type));
}
-static int ecryptfs_set_acl(struct user_namespace *mnt_userns,
+static int ecryptfs_set_acl(struct mnt_idmap *idmap,
struct dentry *dentry, struct posix_acl *acl,
int type)
{
@@ -1137,7 +1137,7 @@ static int ecryptfs_set_acl(struct user_namespace *mnt_userns,
struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
struct inode *lower_inode = d_inode(lower_dentry);
- rc = vfs_set_acl(&init_user_ns, lower_dentry,
+ rc = vfs_set_acl(&nop_mnt_idmap, lower_dentry,
posix_acl_xattr_name(type), acl);
if (!rc)
fsstack_copy_attr_all(d_inode(dentry), lower_inode);
@@ -1190,7 +1190,7 @@ static int ecryptfs_xattr_get(const struct xattr_handler *handler,
}
static int ecryptfs_xattr_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value, size_t size,
int flags)
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 19af229eb7ca..373c3e5747e6 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -428,7 +428,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
if (size < 0)
size = 8;
put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt);
- rc = __vfs_setxattr(&init_user_ns, lower_dentry, lower_inode,
+ rc = __vfs_setxattr(&nop_mnt_idmap, lower_dentry, lower_inode,
ECRYPTFS_XATTR_NAME, xattr_virt, size, 0);
inode_unlock(lower_inode);
if (rc)
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index 617f3ad2485e..b973a2c03dde 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -70,7 +70,7 @@ bool efivarfs_valid_name(const char *str, int len)
return uuid_is_valid(s);
}
-static int efivarfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int efivarfs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
struct inode *inode = NULL;
@@ -163,7 +163,7 @@ efivarfs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
}
static int
-efivarfs_fileattr_set(struct user_namespace *mnt_userns,
+efivarfs_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa)
{
unsigned int i_flags = 0;
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
index 85490370e0ca..704fb59577e0 100644
--- a/fs/erofs/Kconfig
+++ b/fs/erofs/Kconfig
@@ -108,3 +108,21 @@ config EROFS_FS_ONDEMAND
read support.
If unsure, say N.
+
+config EROFS_FS_PCPU_KTHREAD
+ bool "EROFS per-cpu decompression kthread workers"
+ depends on EROFS_FS_ZIP
+ help
+ Saying Y here enables per-CPU kthread workers pool to carry out
+ async decompression for low latencies on some architectures.
+
+ If unsure, say N.
+
+config EROFS_FS_PCPU_KTHREAD_HIPRI
+ bool "EROFS high priority per-CPU kthread workers"
+ depends on EROFS_FS_ZIP && EROFS_FS_PCPU_KTHREAD
+ help
+ This permits EROFS to configure per-CPU kthread workers to run
+ at higher priority.
+
+ If unsure, say N.
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index f57f921683d7..032e12dccb84 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -74,8 +74,7 @@ void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
}
static int erofs_map_blocks_flatmode(struct inode *inode,
- struct erofs_map_blocks *map,
- int flags)
+ struct erofs_map_blocks *map)
{
erofs_blk_t nblocks, lastblk;
u64 offset = map->m_la;
@@ -91,11 +90,8 @@ static int erofs_map_blocks_flatmode(struct inode *inode,
map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la;
map->m_plen = blknr_to_addr(lastblk) - offset;
} else if (tailendpacking) {
- /* 2 - inode inline B: inode, [xattrs], inline last blk... */
- struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
-
- map->m_pa = iloc(sbi, vi->nid) + vi->inode_isize +
- vi->xattr_isize + erofs_blkoff(map->m_la);
+ map->m_pa = erofs_iloc(inode) + vi->inode_isize +
+ vi->xattr_isize + erofs_blkoff(offset);
map->m_plen = inode->i_size - offset;
/* inline data should be located in the same meta block */
@@ -117,8 +113,7 @@ static int erofs_map_blocks_flatmode(struct inode *inode,
return 0;
}
-int erofs_map_blocks(struct inode *inode,
- struct erofs_map_blocks *map, int flags)
+int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
{
struct super_block *sb = inode->i_sb;
struct erofs_inode *vi = EROFS_I(inode);
@@ -130,7 +125,7 @@ int erofs_map_blocks(struct inode *inode,
void *kaddr;
int err = 0;
- trace_erofs_map_blocks_enter(inode, map, flags);
+ trace_erofs_map_blocks_enter(inode, map, 0);
map->m_deviceid = 0;
if (map->m_la >= inode->i_size) {
/* leave out-of-bound access unmapped */
@@ -140,7 +135,7 @@ int erofs_map_blocks(struct inode *inode,
}
if (vi->datalayout != EROFS_INODE_CHUNK_BASED) {
- err = erofs_map_blocks_flatmode(inode, map, flags);
+ err = erofs_map_blocks_flatmode(inode, map);
goto out;
}
@@ -150,7 +145,7 @@ int erofs_map_blocks(struct inode *inode,
unit = EROFS_BLOCK_MAP_ENTRY_SIZE; /* block map */
chunknr = map->m_la >> vi->chunkbits;
- pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
+ pos = ALIGN(erofs_iloc(inode) + vi->inode_isize +
vi->xattr_isize, unit) + unit * chunknr;
kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), EROFS_KMAP);
@@ -192,7 +187,7 @@ out_unlock:
out:
if (!err)
map->m_llen = map->m_plen;
- trace_erofs_map_blocks_exit(inode, map, flags, 0);
+ trace_erofs_map_blocks_exit(inode, map, 0, err);
return err;
}
@@ -255,7 +250,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
map.m_la = offset;
map.m_llen = length;
- ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
+ ret = erofs_map_blocks(inode, &map);
if (ret < 0)
return ret;
diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c
index ecf28f66b97d..6970b09b8307 100644
--- a/fs/erofs/dir.c
+++ b/fs/erofs/dir.c
@@ -6,21 +6,6 @@
*/
#include "internal.h"
-static void debug_one_dentry(unsigned char d_type, const char *de_name,
- unsigned int de_namelen)
-{
-#ifdef CONFIG_EROFS_FS_DEBUG
- /* since the on-disk name could not have the trailing '\0' */
- unsigned char dbg_namebuf[EROFS_NAME_LEN + 1];
-
- memcpy(dbg_namebuf, de_name, de_namelen);
- dbg_namebuf[de_namelen] = '\0';
-
- erofs_dbg("found dirent %s de_len %u d_type %d", dbg_namebuf,
- de_namelen, d_type);
-#endif
-}
-
static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
void *dentry_blk, struct erofs_dirent *de,
unsigned int nameoff, unsigned int maxsize)
@@ -52,10 +37,8 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
return -EFSCORRUPTED;
}
- debug_one_dentry(d_type, de_name, de_namelen);
if (!dir_emit(ctx, de_name, de_namelen,
le64_to_cpu(de->nid), d_type))
- /* stopped by some reason */
return 1;
++de;
ctx->pos += sizeof(struct erofs_dirent);
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index 014e20962376..96a87c023128 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -9,6 +9,7 @@
static DEFINE_MUTEX(erofs_domain_list_lock);
static DEFINE_MUTEX(erofs_domain_cookies_lock);
static LIST_HEAD(erofs_domain_list);
+static LIST_HEAD(erofs_domain_cookies_list);
static struct vfsmount *erofs_pseudo_mnt;
struct erofs_fscache_request {
@@ -164,18 +165,8 @@ static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
{
int ret;
- struct super_block *sb = folio_mapping(folio)->host->i_sb;
+ struct erofs_fscache *ctx = folio_mapping(folio)->host->i_private;
struct erofs_fscache_request *req;
- struct erofs_map_dev mdev = {
- .m_deviceid = 0,
- .m_pa = folio_pos(folio),
- };
-
- ret = erofs_map_dev(sb, &mdev);
- if (ret) {
- folio_unlock(folio);
- return ret;
- }
req = erofs_fscache_req_alloc(folio_mapping(folio),
folio_pos(folio), folio_size(folio));
@@ -184,8 +175,8 @@ static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
return PTR_ERR(req);
}
- ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
- req, mdev.m_pa, folio_size(folio));
+ ret = erofs_fscache_read_folios_async(ctx->cookie, req,
+ folio_pos(folio), folio_size(folio));
if (ret)
req->error = ret;
@@ -207,7 +198,7 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary)
int ret;
map.m_la = pos;
- ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
+ ret = erofs_map_blocks(inode, &map);
if (ret)
return ret;
@@ -328,8 +319,6 @@ const struct address_space_operations erofs_fscache_access_aops = {
static void erofs_fscache_domain_put(struct erofs_domain *domain)
{
- if (!domain)
- return;
mutex_lock(&erofs_domain_list_lock);
if (refcount_dec_and_test(&domain->ref)) {
list_del(&domain->list);
@@ -337,8 +326,8 @@ static void erofs_fscache_domain_put(struct erofs_domain *domain)
kern_unmount(erofs_pseudo_mnt);
erofs_pseudo_mnt = NULL;
}
- mutex_unlock(&erofs_domain_list_lock);
fscache_relinquish_volume(domain->volume, NULL, false);
+ mutex_unlock(&erofs_domain_list_lock);
kfree(domain->domain_id);
kfree(domain);
return;
@@ -431,19 +420,21 @@ static int erofs_fscache_register_domain(struct super_block *sb)
return err;
}
-static
-struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
- char *name,
- unsigned int flags)
+static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
+ char *name, unsigned int flags)
{
struct fscache_volume *volume = EROFS_SB(sb)->volume;
struct erofs_fscache *ctx;
struct fscache_cookie *cookie;
+ struct super_block *isb;
+ struct inode *inode;
int ret;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return ERR_PTR(-ENOMEM);
+ INIT_LIST_HEAD(&ctx->node);
+ refcount_set(&ctx->ref, 1);
cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE,
name, strlen(name), NULL, 0, 0);
@@ -452,32 +443,32 @@ struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
ret = -EINVAL;
goto err;
}
-
fscache_use_cookie(cookie, false);
- ctx->cookie = cookie;
-
- if (flags & EROFS_REG_COOKIE_NEED_INODE) {
- struct inode *const inode = new_inode(sb);
-
- if (!inode) {
- erofs_err(sb, "failed to get anon inode for %s", name);
- ret = -ENOMEM;
- goto err_cookie;
- }
- set_nlink(inode, 1);
- inode->i_size = OFFSET_MAX;
- inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
- mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
-
- ctx->inode = inode;
+ /*
+ * Allocate anonymous inode in global pseudo mount for shareable blobs,
+ * so that they are accessible among erofs fs instances.
+ */
+ isb = flags & EROFS_REG_COOKIE_SHARE ? erofs_pseudo_mnt->mnt_sb : sb;
+ inode = new_inode(isb);
+ if (!inode) {
+ erofs_err(sb, "failed to get anon inode for %s", name);
+ ret = -ENOMEM;
+ goto err_cookie;
}
+ inode->i_size = OFFSET_MAX;
+ inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
+ mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
+ inode->i_private = ctx;
+
+ ctx->cookie = cookie;
+ ctx->inode = inode;
return ctx;
err_cookie:
- fscache_unuse_cookie(ctx->cookie, NULL, NULL);
- fscache_relinquish_cookie(ctx->cookie, false);
+ fscache_unuse_cookie(cookie, NULL, NULL);
+ fscache_relinquish_cookie(cookie, false);
err:
kfree(ctx);
return ERR_PTR(ret);
@@ -492,13 +483,9 @@ static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx)
kfree(ctx);
}
-static
-struct erofs_fscache *erofs_fscache_domain_init_cookie(struct super_block *sb,
- char *name,
- unsigned int flags)
+static struct erofs_fscache *erofs_domain_init_cookie(struct super_block *sb,
+ char *name, unsigned int flags)
{
- int err;
- struct inode *inode;
struct erofs_fscache *ctx;
struct erofs_domain *domain = EROFS_SB(sb)->domain;
@@ -508,55 +495,38 @@ struct erofs_fscache *erofs_fscache_domain_init_cookie(struct super_block *sb,
ctx->name = kstrdup(name, GFP_KERNEL);
if (!ctx->name) {
- err = -ENOMEM;
- goto out;
- }
-
- inode = new_inode(erofs_pseudo_mnt->mnt_sb);
- if (!inode) {
- err = -ENOMEM;
- goto out;
+ erofs_fscache_relinquish_cookie(ctx);
+ return ERR_PTR(-ENOMEM);
}
- ctx->domain = domain;
- ctx->anon_inode = inode;
- inode->i_private = ctx;
refcount_inc(&domain->ref);
+ ctx->domain = domain;
+ list_add(&ctx->node, &erofs_domain_cookies_list);
return ctx;
-out:
- erofs_fscache_relinquish_cookie(ctx);
- return ERR_PTR(err);
}
-static
-struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
- char *name,
- unsigned int flags)
+static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
+ char *name, unsigned int flags)
{
- struct inode *inode;
struct erofs_fscache *ctx;
struct erofs_domain *domain = EROFS_SB(sb)->domain;
- struct super_block *psb = erofs_pseudo_mnt->mnt_sb;
+ flags |= EROFS_REG_COOKIE_SHARE;
mutex_lock(&erofs_domain_cookies_lock);
- spin_lock(&psb->s_inode_list_lock);
- list_for_each_entry(inode, &psb->s_inodes, i_sb_list) {
- ctx = inode->i_private;
- if (!ctx || ctx->domain != domain || strcmp(ctx->name, name))
+ list_for_each_entry(ctx, &erofs_domain_cookies_list, node) {
+ if (ctx->domain != domain || strcmp(ctx->name, name))
continue;
if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) {
- igrab(inode);
+ refcount_inc(&ctx->ref);
} else {
erofs_err(sb, "%s already exists in domain %s", name,
domain->domain_id);
ctx = ERR_PTR(-EEXIST);
}
- spin_unlock(&psb->s_inode_list_lock);
mutex_unlock(&erofs_domain_cookies_lock);
return ctx;
}
- spin_unlock(&psb->s_inode_list_lock);
- ctx = erofs_fscache_domain_init_cookie(sb, name, flags);
+ ctx = erofs_domain_init_cookie(sb, name, flags);
mutex_unlock(&erofs_domain_cookies_lock);
return ctx;
}
@@ -572,23 +542,22 @@ struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx)
{
- bool drop;
- struct erofs_domain *domain;
+ struct erofs_domain *domain = NULL;
if (!ctx)
return;
- domain = ctx->domain;
- if (domain) {
- mutex_lock(&erofs_domain_cookies_lock);
- drop = atomic_read(&ctx->anon_inode->i_count) == 1;
- iput(ctx->anon_inode);
- mutex_unlock(&erofs_domain_cookies_lock);
- if (!drop)
- return;
- }
+ if (!ctx->domain)
+ return erofs_fscache_relinquish_cookie(ctx);
- erofs_fscache_relinquish_cookie(ctx);
- erofs_fscache_domain_put(domain);
+ mutex_lock(&erofs_domain_cookies_lock);
+ if (refcount_dec_and_test(&ctx->ref)) {
+ domain = ctx->domain;
+ list_del(&ctx->node);
+ erofs_fscache_relinquish_cookie(ctx);
+ }
+ mutex_unlock(&erofs_domain_cookies_lock);
+ if (domain)
+ erofs_fscache_domain_put(domain);
}
int erofs_fscache_register_fs(struct super_block *sb)
@@ -596,7 +565,7 @@ int erofs_fscache_register_fs(struct super_block *sb)
int ret;
struct erofs_sb_info *sbi = EROFS_SB(sb);
struct erofs_fscache *fscache;
- unsigned int flags;
+ unsigned int flags = 0;
if (sbi->domain_id)
ret = erofs_fscache_register_domain(sb);
@@ -615,7 +584,6 @@ int erofs_fscache_register_fs(struct super_block *sb)
*
* Acquired domain/volume will be relinquished in kill_sb() on error.
*/
- flags = EROFS_REG_COOKIE_NEED_INODE;
if (sbi->domain_id)
flags |= EROFS_REG_COOKIE_NEED_NOEXIST;
fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags);
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index d3b8736fa124..4be7dda3cd24 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -14,7 +14,7 @@ static void *erofs_read_inode(struct erofs_buf *buf,
struct super_block *sb = inode->i_sb;
struct erofs_sb_info *sbi = EROFS_SB(sb);
struct erofs_inode *vi = EROFS_I(inode);
- const erofs_off_t inode_loc = iloc(sbi, vi->nid);
+ const erofs_off_t inode_loc = erofs_iloc(inode);
erofs_blk_t blkaddr, nblks = 0;
void *kaddr;
@@ -308,52 +308,54 @@ out_unlock:
}
/*
- * erofs nid is 64bits, but i_ino is 'unsigned long', therefore
- * we should do more for 32-bit platform to find the right inode.
+ * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
+ * so that it will fit.
*/
-static int erofs_ilookup_test_actor(struct inode *inode, void *opaque)
+static ino_t erofs_squash_ino(erofs_nid_t nid)
{
- const erofs_nid_t nid = *(erofs_nid_t *)opaque;
+ ino_t ino = (ino_t)nid;
+
+ if (sizeof(ino_t) < sizeof(erofs_nid_t))
+ ino ^= nid >> (sizeof(erofs_nid_t) - sizeof(ino_t)) * 8;
+ return ino;
+}
- return EROFS_I(inode)->nid == nid;
+static int erofs_iget5_eq(struct inode *inode, void *opaque)
+{
+ return EROFS_I(inode)->nid == *(erofs_nid_t *)opaque;
}
-static int erofs_iget_set_actor(struct inode *inode, void *opaque)
+static int erofs_iget5_set(struct inode *inode, void *opaque)
{
const erofs_nid_t nid = *(erofs_nid_t *)opaque;
- inode->i_ino = erofs_inode_hash(nid);
+ inode->i_ino = erofs_squash_ino(nid);
+ EROFS_I(inode)->nid = nid;
return 0;
}
struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid)
{
- const unsigned long hashval = erofs_inode_hash(nid);
struct inode *inode;
- inode = iget5_locked(sb, hashval, erofs_ilookup_test_actor,
- erofs_iget_set_actor, &nid);
+ inode = iget5_locked(sb, erofs_squash_ino(nid), erofs_iget5_eq,
+ erofs_iget5_set, &nid);
if (!inode)
return ERR_PTR(-ENOMEM);
if (inode->i_state & I_NEW) {
- int err;
- struct erofs_inode *vi = EROFS_I(inode);
-
- vi->nid = nid;
+ int err = erofs_fill_inode(inode);
- err = erofs_fill_inode(inode);
- if (!err) {
- unlock_new_inode(inode);
- } else {
+ if (err) {
iget_failed(inode);
- inode = ERR_PTR(err);
+ return ERR_PTR(err);
}
+ unlock_new_inode(inode);
}
return inode;
}
-int erofs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int erofs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask,
unsigned int query_flags)
{
@@ -366,7 +368,7 @@ int erofs_getattr(struct user_namespace *mnt_userns, const struct path *path,
stat->attributes_mask |= (STATX_ATTR_COMPRESSED |
STATX_ATTR_IMMUTABLE);
- generic_fillattr(mnt_userns, inode, stat);
+ generic_fillattr(idmap, inode, stat);
return 0;
}
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index bb8501c0ff5b..3f3561d37d1b 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -12,7 +12,6 @@
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/bio.h>
-#include <linux/buffer_head.h>
#include <linux/magic.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
@@ -108,9 +107,12 @@ struct erofs_domain {
struct erofs_fscache {
struct fscache_cookie *cookie;
- struct inode *inode;
- struct inode *anon_inode;
+ struct inode *inode; /* anonymous inode for the blob */
+
+ /* used for share domain mode */
struct erofs_domain *domain;
+ struct list_head node;
+ refcount_t ref;
char *name;
};
@@ -271,11 +273,6 @@ struct erofs_buf {
#define erofs_blkoff(addr) ((addr) % EROFS_BLKSIZ)
#define blknr_to_addr(nr) ((erofs_off_t)(nr) * EROFS_BLKSIZ)
-static inline erofs_off_t iloc(struct erofs_sb_info *sbi, erofs_nid_t nid)
-{
- return blknr_to_addr(sbi->meta_blkaddr) + (nid << sbi->islotbits);
-}
-
#define EROFS_FEATURE_FUNCS(name, compat, feature) \
static inline bool erofs_sb_has_##name(struct erofs_sb_info *sbi) \
{ \
@@ -340,13 +337,14 @@ struct erofs_inode {
struct inode vfs_inode;
};
-#define EROFS_I(ptr) \
- container_of(ptr, struct erofs_inode, vfs_inode)
+#define EROFS_I(ptr) container_of(ptr, struct erofs_inode, vfs_inode)
-static inline unsigned long erofs_inode_datablocks(struct inode *inode)
+static inline erofs_off_t erofs_iloc(struct inode *inode)
{
- /* since i_size cannot be changed */
- return DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
+ struct erofs_sb_info *sbi = EROFS_I_SB(inode);
+
+ return blknr_to_addr(sbi->meta_blkaddr) +
+ (EROFS_I(inode)->nid << sbi->islotbits);
}
static inline unsigned int erofs_bitrange(unsigned int value, unsigned int bit,
@@ -382,31 +380,18 @@ struct page *erofs_grab_cache_page_nowait(struct address_space *mapping,
readahead_gfp_mask(mapping) & ~__GFP_RECLAIM);
}
-extern const struct super_operations erofs_sops;
-extern struct file_system_type erofs_fs_type;
-
-extern const struct address_space_operations erofs_raw_access_aops;
-extern const struct address_space_operations z_erofs_aops;
-
-enum {
- BH_Encoded = BH_PrivateStart,
- BH_FullMapped,
- BH_Fragment,
- BH_Partialref,
-};
-
/* Has a disk mapping */
-#define EROFS_MAP_MAPPED (1 << BH_Mapped)
+#define EROFS_MAP_MAPPED 0x0001
/* Located in metadata (could be copied from bd_inode) */
-#define EROFS_MAP_META (1 << BH_Meta)
+#define EROFS_MAP_META 0x0002
/* The extent is encoded */
-#define EROFS_MAP_ENCODED (1 << BH_Encoded)
+#define EROFS_MAP_ENCODED 0x0004
/* The length of extent is full */
-#define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped)
+#define EROFS_MAP_FULL_MAPPED 0x0008
/* Located in the special packed inode */
-#define EROFS_MAP_FRAGMENT (1 << BH_Fragment)
+#define EROFS_MAP_FRAGMENT 0x0010
/* The extent refers to partial decompressed data */
-#define EROFS_MAP_PARTIAL_REF (1 << BH_Partialref)
+#define EROFS_MAP_PARTIAL_REF 0x0020
struct erofs_map_blocks {
struct erofs_buf buf;
@@ -419,17 +404,15 @@ struct erofs_map_blocks {
unsigned int m_flags;
};
-/* Flags used by erofs_map_blocks_flatmode() */
-#define EROFS_GET_BLOCKS_RAW 0x0001
/*
* Used to get the exact decompressed length, e.g. fiemap (consider lookback
* approach instead if possible since it's more metadata lightweight.)
*/
-#define EROFS_GET_BLOCKS_FIEMAP 0x0002
+#define EROFS_GET_BLOCKS_FIEMAP 0x0001
/* Used to map the whole extent if non-negligible data is requested for LZMA */
-#define EROFS_GET_BLOCKS_READMORE 0x0004
+#define EROFS_GET_BLOCKS_READMORE 0x0002
/* Used to map tail extent for tailpacking inline or fragment pcluster */
-#define EROFS_GET_BLOCKS_FINDTAIL 0x0008
+#define EROFS_GET_BLOCKS_FINDTAIL 0x0004
enum {
Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
@@ -437,24 +420,6 @@ enum {
Z_EROFS_COMPRESSION_RUNTIME_MAX
};
-/* zmap.c */
-extern const struct iomap_ops z_erofs_iomap_report_ops;
-
-#ifdef CONFIG_EROFS_FS_ZIP
-int z_erofs_fill_inode(struct inode *inode);
-int z_erofs_map_blocks_iter(struct inode *inode,
- struct erofs_map_blocks *map,
- int flags);
-#else
-static inline int z_erofs_fill_inode(struct inode *inode) { return -EOPNOTSUPP; }
-static inline int z_erofs_map_blocks_iter(struct inode *inode,
- struct erofs_map_blocks *map,
- int flags)
-{
- return -EOPNOTSUPP;
-}
-#endif /* !CONFIG_EROFS_FS_ZIP */
-
struct erofs_map_dev {
struct erofs_fscache *m_fscache;
struct block_device *m_bdev;
@@ -465,8 +430,27 @@ struct erofs_map_dev {
unsigned int m_deviceid;
};
-/* data.c */
+extern struct file_system_type erofs_fs_type;
+extern const struct super_operations erofs_sops;
+
+extern const struct address_space_operations erofs_raw_access_aops;
+extern const struct address_space_operations z_erofs_aops;
+extern const struct address_space_operations erofs_fscache_access_aops;
+
+extern const struct inode_operations erofs_generic_iops;
+extern const struct inode_operations erofs_symlink_iops;
+extern const struct inode_operations erofs_fast_symlink_iops;
+extern const struct inode_operations erofs_dir_iops;
+
extern const struct file_operations erofs_file_fops;
+extern const struct file_operations erofs_dir_fops;
+
+extern const struct iomap_ops z_erofs_iomap_report_ops;
+
+/* flags for erofs_fscache_register_cookie() */
+#define EROFS_REG_COOKIE_SHARE 0x0001
+#define EROFS_REG_COOKIE_NEED_NOEXIST 0x0002
+
void erofs_unmap_metabuf(struct erofs_buf *buf);
void erofs_put_metabuf(struct erofs_buf *buf);
void *erofs_bread(struct erofs_buf *buf, struct inode *inode,
@@ -476,37 +460,14 @@ void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev);
int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len);
-int erofs_map_blocks(struct inode *inode,
- struct erofs_map_blocks *map, int flags);
-
-/* inode.c */
-static inline unsigned long erofs_inode_hash(erofs_nid_t nid)
-{
-#if BITS_PER_LONG == 32
- return (nid >> 32) ^ (nid & 0xffffffff);
-#else
- return nid;
-#endif
-}
-
-extern const struct inode_operations erofs_generic_iops;
-extern const struct inode_operations erofs_symlink_iops;
-extern const struct inode_operations erofs_fast_symlink_iops;
-
+int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map);
struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid);
-int erofs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int erofs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask,
unsigned int query_flags);
-
-/* namei.c */
-extern const struct inode_operations erofs_dir_iops;
-
int erofs_namei(struct inode *dir, const struct qstr *name,
erofs_nid_t *nid, unsigned int *d_type);
-/* dir.c */
-extern const struct file_operations erofs_dir_fops;
-
static inline void *erofs_vm_map_ram(struct page **pages, unsigned int count)
{
int retried = 0;
@@ -522,23 +483,19 @@ static inline void *erofs_vm_map_ram(struct page **pages, unsigned int count)
return NULL;
}
-/* pcpubuf.c */
void *erofs_get_pcpubuf(unsigned int requiredpages);
void erofs_put_pcpubuf(void *ptr);
int erofs_pcpubuf_growsize(unsigned int nrpages);
void erofs_pcpubuf_init(void);
void erofs_pcpubuf_exit(void);
-/* sysfs.c */
int erofs_register_sysfs(struct super_block *sb);
void erofs_unregister_sysfs(struct super_block *sb);
int __init erofs_init_sysfs(void);
void erofs_exit_sysfs(void);
-/* utils.c / zdata.c */
struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp);
-static inline void erofs_pagepool_add(struct page **pagepool,
- struct page *page)
+static inline void erofs_pagepool_add(struct page **pagepool, struct page *page)
{
set_page_private(page, (unsigned long)*pagepool);
*pagepool = page;
@@ -564,6 +521,9 @@ int erofs_try_to_free_cached_page(struct page *page);
int z_erofs_load_lz4_config(struct super_block *sb,
struct erofs_super_block *dsb,
struct z_erofs_lz4_cfgs *lz4, int len);
+int z_erofs_fill_inode(struct inode *inode);
+int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
+ int flags);
#else
static inline void erofs_shrinker_register(struct super_block *sb) {}
static inline void erofs_shrinker_unregister(struct super_block *sb) {}
@@ -581,6 +541,7 @@ static inline int z_erofs_load_lz4_config(struct super_block *sb,
}
return 0;
}
+static inline int z_erofs_fill_inode(struct inode *inode) { return -EOPNOTSUPP; }
#endif /* !CONFIG_EROFS_FS_ZIP */
#ifdef CONFIG_EROFS_FS_ZIP_LZMA
@@ -601,23 +562,15 @@ static inline int z_erofs_load_lzma_config(struct super_block *sb,
}
return 0;
}
-#endif /* !CONFIG_EROFS_FS_ZIP */
+#endif /* !CONFIG_EROFS_FS_ZIP_LZMA */
-/* flags for erofs_fscache_register_cookie() */
-#define EROFS_REG_COOKIE_NEED_INODE 1
-#define EROFS_REG_COOKIE_NEED_NOEXIST 2
-
-/* fscache.c */
#ifdef CONFIG_EROFS_FS_ONDEMAND
int erofs_fscache_register_fs(struct super_block *sb);
void erofs_fscache_unregister_fs(struct super_block *sb);
struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
- char *name,
- unsigned int flags);
+ char *name, unsigned int flags);
void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache);
-
-extern const struct address_space_operations erofs_fscache_access_aops;
#else
static inline int erofs_fscache_register_fs(struct super_block *sb)
{
@@ -627,8 +580,7 @@ static inline void erofs_fscache_unregister_fs(struct super_block *sb) {}
static inline
struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
- char *name,
- unsigned int flags)
+ char *name, unsigned int flags)
{
return ERR_PTR(-EOPNOTSUPP);
}
diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c
index b64a108fac92..966eabc61c13 100644
--- a/fs/erofs/namei.c
+++ b/fs/erofs/namei.c
@@ -5,7 +5,6 @@
* Copyright (C) 2022, Alibaba Cloud
*/
#include "xattr.h"
-
#include <trace/events/erofs.h>
struct erofs_qstr {
@@ -87,19 +86,13 @@ static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name,
return ERR_PTR(-ENOENT);
}
-static void *find_target_block_classic(struct erofs_buf *target,
- struct inode *dir,
- struct erofs_qstr *name,
- int *_ndirents)
+static void *erofs_find_target_block(struct erofs_buf *target,
+ struct inode *dir, struct erofs_qstr *name, int *_ndirents)
{
- unsigned int startprfx, endprfx;
- int head, back;
+ int head = 0, back = DIV_ROUND_UP(dir->i_size, EROFS_BLKSIZ) - 1;
+ unsigned int startprfx = 0, endprfx = 0;
void *candidate = ERR_PTR(-ENOENT);
- startprfx = endprfx = 0;
- head = 0;
- back = erofs_inode_datablocks(dir) - 1;
-
while (head <= back) {
const int mid = head + (back - head) / 2;
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
@@ -180,8 +173,7 @@ int erofs_namei(struct inode *dir, const struct qstr *name, erofs_nid_t *nid,
qn.end = name->name + name->len;
ndirents = 0;
-
- de = find_target_block_classic(&buf, dir, &qn, &ndirents);
+ de = erofs_find_target_block(&buf, dir, &qn, &ndirents);
if (IS_ERR(de))
return PTR_ERR(de);
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 481788c24a68..19b1ae79cec4 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -5,7 +5,6 @@
* Copyright (C) 2021, Alibaba Cloud
*/
#include <linux/module.h>
-#include <linux/buffer_head.h>
#include <linux/statfs.h>
#include <linux/parser.h>
#include <linux/seq_file.h>
@@ -577,26 +576,25 @@ static int erofs_fc_parse_param(struct fs_context *fc,
}
++ctx->devs->extra_devices;
break;
- case Opt_fsid:
#ifdef CONFIG_EROFS_FS_ONDEMAND
+ case Opt_fsid:
kfree(ctx->fsid);
ctx->fsid = kstrdup(param->string, GFP_KERNEL);
if (!ctx->fsid)
return -ENOMEM;
-#else
- errorfc(fc, "fsid option not supported");
-#endif
break;
case Opt_domain_id:
-#ifdef CONFIG_EROFS_FS_ONDEMAND
kfree(ctx->domain_id);
ctx->domain_id = kstrdup(param->string, GFP_KERNEL);
if (!ctx->domain_id)
return -ENOMEM;
+ break;
#else
- errorfc(fc, "domain_id option not supported");
-#endif
+ case Opt_fsid:
+ case Opt_domain_id:
+ errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name);
break;
+#endif
default:
return -ENOPARAM;
}
@@ -970,6 +968,8 @@ static void erofs_put_super(struct super_block *sb)
iput(sbi->packed_inode);
sbi->packed_inode = NULL;
#endif
+ erofs_free_dev_context(sbi->devs);
+ sbi->devs = NULL;
erofs_fscache_unregister_fs(sb);
}
diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c
index fd476961f742..435e515c0792 100644
--- a/fs/erofs/sysfs.c
+++ b/fs/erofs/sysfs.c
@@ -179,13 +179,13 @@ static const struct sysfs_ops erofs_attr_ops = {
.store = erofs_attr_store,
};
-static struct kobj_type erofs_sb_ktype = {
+static const struct kobj_type erofs_sb_ktype = {
.default_groups = erofs_groups,
.sysfs_ops = &erofs_attr_ops,
.release = erofs_sb_release,
};
-static struct kobj_type erofs_ktype = {
+static const struct kobj_type erofs_ktype = {
.sysfs_ops = &erofs_attr_ops,
};
@@ -193,7 +193,7 @@ static struct kset erofs_root = {
.kobj = {.ktype = &erofs_ktype},
};
-static struct kobj_type erofs_feat_ktype = {
+static const struct kobj_type erofs_feat_ktype = {
.default_groups = erofs_feat_groups,
.sysfs_ops = &erofs_attr_ops,
};
diff --git a/fs/erofs/tagptr.h b/fs/erofs/tagptr.h
deleted file mode 100644
index 64ceb7270b5c..000000000000
--- a/fs/erofs/tagptr.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * A tagged pointer implementation
- */
-#ifndef __EROFS_FS_TAGPTR_H
-#define __EROFS_FS_TAGPTR_H
-
-#include <linux/types.h>
-#include <linux/build_bug.h>
-
-/*
- * the name of tagged pointer types are tagptr{1, 2, 3...}_t
- * avoid directly using the internal structs __tagptr{1, 2, 3...}
- */
-#define __MAKE_TAGPTR(n) \
-typedef struct __tagptr##n { \
- uintptr_t v; \
-} tagptr##n##_t;
-
-__MAKE_TAGPTR(1)
-__MAKE_TAGPTR(2)
-__MAKE_TAGPTR(3)
-__MAKE_TAGPTR(4)
-
-#undef __MAKE_TAGPTR
-
-extern void __compiletime_error("bad tagptr tags")
- __bad_tagptr_tags(void);
-
-extern void __compiletime_error("bad tagptr type")
- __bad_tagptr_type(void);
-
-/* fix the broken usage of "#define tagptr2_t tagptr3_t" by users */
-#define __tagptr_mask_1(ptr, n) \
- __builtin_types_compatible_p(typeof(ptr), struct __tagptr##n) ? \
- (1UL << (n)) - 1 :
-
-#define __tagptr_mask(ptr) (\
- __tagptr_mask_1(ptr, 1) ( \
- __tagptr_mask_1(ptr, 2) ( \
- __tagptr_mask_1(ptr, 3) ( \
- __tagptr_mask_1(ptr, 4) ( \
- __bad_tagptr_type(), 0)))))
-
-/* generate a tagged pointer from a raw value */
-#define tagptr_init(type, val) \
- ((typeof(type)){ .v = (uintptr_t)(val) })
-
-/*
- * directly cast a tagged pointer to the native pointer type, which
- * could be used for backward compatibility of existing code.
- */
-#define tagptr_cast_ptr(tptr) ((void *)(tptr).v)
-
-/* encode tagged pointers */
-#define tagptr_fold(type, ptr, _tags) ({ \
- const typeof(_tags) tags = (_tags); \
- if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(type))) \
- __bad_tagptr_tags(); \
-tagptr_init(type, (uintptr_t)(ptr) | tags); })
-
-/* decode tagged pointers */
-#define tagptr_unfold_ptr(tptr) \
- ((void *)((tptr).v & ~__tagptr_mask(tptr)))
-
-#define tagptr_unfold_tags(tptr) \
- ((tptr).v & __tagptr_mask(tptr))
-
-/* operations for the tagger pointer */
-#define tagptr_eq(_tptr1, _tptr2) ({ \
- typeof(_tptr1) tptr1 = (_tptr1); \
- typeof(_tptr2) tptr2 = (_tptr2); \
- (void)(&tptr1 == &tptr2); \
-(tptr1).v == (tptr2).v; })
-
-/* lock-free CAS operation */
-#define tagptr_cmpxchg(_ptptr, _o, _n) ({ \
- typeof(_ptptr) ptptr = (_ptptr); \
- typeof(_o) o = (_o); \
- typeof(_n) n = (_n); \
- (void)(&o == &n); \
- (void)(&o == ptptr); \
-tagptr_init(o, cmpxchg(&ptptr->v, o.v, n.v)); })
-
-/* wrap WRITE_ONCE if atomic update is needed */
-#define tagptr_replace_tags(_ptptr, tags) ({ \
- typeof(_ptptr) ptptr = (_ptptr); \
- *ptptr = tagptr_fold(*ptptr, tagptr_unfold_ptr(*ptptr), tags); \
-*ptptr; })
-
-#define tagptr_set_tags(_ptptr, _tags) ({ \
- typeof(_ptptr) ptptr = (_ptptr); \
- const typeof(_tags) tags = (_tags); \
- if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(*ptptr))) \
- __bad_tagptr_tags(); \
- ptptr->v |= tags; \
-*ptptr; })
-
-#define tagptr_clear_tags(_ptptr, _tags) ({ \
- typeof(_ptptr) ptptr = (_ptptr); \
- const typeof(_tags) tags = (_tags); \
- if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(*ptptr))) \
- __bad_tagptr_tags(); \
- ptptr->v &= ~tags; \
-*ptptr; })
-
-#endif /* __EROFS_FS_TAGPTR_H */
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
index a62fb8a3318a..60729b1220b6 100644
--- a/fs/erofs/xattr.c
+++ b/fs/erofs/xattr.c
@@ -22,8 +22,7 @@ static int init_inode_xattrs(struct inode *inode)
struct xattr_iter it;
unsigned int i;
struct erofs_xattr_ibody_header *ih;
- struct super_block *sb;
- struct erofs_sb_info *sbi;
+ struct super_block *sb = inode->i_sb;
int ret = 0;
/* the most case is that xattrs of this inode are initialized. */
@@ -52,15 +51,14 @@ static int init_inode_xattrs(struct inode *inode)
* undefined right now (maybe use later with some new sb feature).
*/
if (vi->xattr_isize == sizeof(struct erofs_xattr_ibody_header)) {
- erofs_err(inode->i_sb,
+ erofs_err(sb,
"xattr_isize %d of nid %llu is not supported yet",
vi->xattr_isize, vi->nid);
ret = -EOPNOTSUPP;
goto out_unlock;
} else if (vi->xattr_isize < sizeof(struct erofs_xattr_ibody_header)) {
if (vi->xattr_isize) {
- erofs_err(inode->i_sb,
- "bogus xattr ibody @ nid %llu", vi->nid);
+ erofs_err(sb, "bogus xattr ibody @ nid %llu", vi->nid);
DBG_BUGON(1);
ret = -EFSCORRUPTED;
goto out_unlock; /* xattr ondisk layout error */
@@ -69,11 +67,9 @@ static int init_inode_xattrs(struct inode *inode)
goto out_unlock;
}
- sb = inode->i_sb;
- sbi = EROFS_SB(sb);
it.buf = __EROFS_BUF_INITIALIZER;
- it.blkaddr = erofs_blknr(iloc(sbi, vi->nid) + vi->inode_isize);
- it.ofs = erofs_blkoff(iloc(sbi, vi->nid) + vi->inode_isize);
+ it.blkaddr = erofs_blknr(erofs_iloc(inode) + vi->inode_isize);
+ it.ofs = erofs_blkoff(erofs_iloc(inode) + vi->inode_isize);
/* read in shared xattr array (non-atomic, see kmalloc below) */
it.kaddr = erofs_read_metabuf(&it.buf, sb, it.blkaddr, EROFS_KMAP);
@@ -159,7 +155,6 @@ static int inline_xattr_iter_begin(struct xattr_iter *it,
struct inode *inode)
{
struct erofs_inode *const vi = EROFS_I(inode);
- struct erofs_sb_info *const sbi = EROFS_SB(inode->i_sb);
unsigned int xattr_header_sz, inline_xattr_ofs;
xattr_header_sz = inlinexattr_header_size(inode);
@@ -170,9 +165,8 @@ static int inline_xattr_iter_begin(struct xattr_iter *it,
inline_xattr_ofs = vi->inode_isize + xattr_header_sz;
- it->blkaddr = erofs_blknr(iloc(sbi, vi->nid) + inline_xattr_ofs);
- it->ofs = erofs_blkoff(iloc(sbi, vi->nid) + inline_xattr_ofs);
-
+ it->blkaddr = erofs_blknr(erofs_iloc(inode) + inline_xattr_ofs);
+ it->ofs = erofs_blkoff(erofs_iloc(inode) + inline_xattr_ofs);
it->kaddr = erofs_read_metabuf(&it->buf, inode->i_sb, it->blkaddr,
EROFS_KMAP);
if (IS_ERR(it->kaddr))
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index ccf7c55d477f..3247d2422bea 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -4,13 +4,178 @@
* https://www.huawei.com/
* Copyright (C) 2022 Alibaba Cloud
*/
-#include "zdata.h"
#include "compress.h"
#include <linux/prefetch.h>
#include <linux/psi.h>
-
+#include <linux/cpuhotplug.h>
#include <trace/events/erofs.h>
+#define Z_EROFS_PCLUSTER_MAX_PAGES (Z_EROFS_PCLUSTER_MAX_SIZE / PAGE_SIZE)
+#define Z_EROFS_INLINE_BVECS 2
+
+/*
+ * let's leave a type here in case of introducing
+ * another tagged pointer later.
+ */
+typedef void *z_erofs_next_pcluster_t;
+
+struct z_erofs_bvec {
+ struct page *page;
+ int offset;
+ unsigned int end;
+};
+
+#define __Z_EROFS_BVSET(name, total) \
+struct name { \
+ /* point to the next page which contains the following bvecs */ \
+ struct page *nextpage; \
+ struct z_erofs_bvec bvec[total]; \
+}
+__Z_EROFS_BVSET(z_erofs_bvset,);
+__Z_EROFS_BVSET(z_erofs_bvset_inline, Z_EROFS_INLINE_BVECS);
+
+/*
+ * Structure fields follow one of the following exclusion rules.
+ *
+ * I: Modifiable by initialization/destruction paths and read-only
+ * for everyone else;
+ *
+ * L: Field should be protected by the pcluster lock;
+ *
+ * A: Field should be accessed / updated in atomic for parallelized code.
+ */
+struct z_erofs_pcluster {
+ struct erofs_workgroup obj;
+ struct mutex lock;
+
+ /* A: point to next chained pcluster or TAILs */
+ z_erofs_next_pcluster_t next;
+
+ /* L: the maximum decompression size of this round */
+ unsigned int length;
+
+ /* L: total number of bvecs */
+ unsigned int vcnt;
+
+ /* I: page offset of start position of decompression */
+ unsigned short pageofs_out;
+
+ /* I: page offset of inline compressed data */
+ unsigned short pageofs_in;
+
+ union {
+ /* L: inline a certain number of bvec for bootstrap */
+ struct z_erofs_bvset_inline bvset;
+
+ /* I: can be used to free the pcluster by RCU. */
+ struct rcu_head rcu;
+ };
+
+ union {
+ /* I: physical cluster size in pages */
+ unsigned short pclusterpages;
+
+ /* I: tailpacking inline compressed size */
+ unsigned short tailpacking_size;
+ };
+
+ /* I: compression algorithm format */
+ unsigned char algorithmformat;
+
+ /* L: whether partial decompression or not */
+ bool partial;
+
+ /* L: indicate several pageofs_outs or not */
+ bool multibases;
+
+ /* A: compressed bvecs (can be cached or inplaced pages) */
+ struct z_erofs_bvec compressed_bvecs[];
+};
+
+/* let's avoid the valid 32-bit kernel addresses */
+
+/* the chained workgroup has't submitted io (still open) */
+#define Z_EROFS_PCLUSTER_TAIL ((void *)0x5F0ECAFE)
+/* the chained workgroup has already submitted io */
+#define Z_EROFS_PCLUSTER_TAIL_CLOSED ((void *)0x5F0EDEAD)
+
+#define Z_EROFS_PCLUSTER_NIL (NULL)
+
+struct z_erofs_decompressqueue {
+ struct super_block *sb;
+ atomic_t pending_bios;
+ z_erofs_next_pcluster_t head;
+
+ union {
+ struct completion done;
+ struct work_struct work;
+ struct kthread_work kthread_work;
+ } u;
+ bool eio, sync;
+};
+
+static inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl)
+{
+ return !pcl->obj.index;
+}
+
+static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
+{
+ if (z_erofs_is_inline_pcluster(pcl))
+ return 1;
+ return pcl->pclusterpages;
+}
+
+/*
+ * bit 30: I/O error occurred on this page
+ * bit 0 - 29: remaining parts to complete this page
+ */
+#define Z_EROFS_PAGE_EIO (1 << 30)
+
+static inline void z_erofs_onlinepage_init(struct page *page)
+{
+ union {
+ atomic_t o;
+ unsigned long v;
+ } u = { .o = ATOMIC_INIT(1) };
+
+ set_page_private(page, u.v);
+ smp_wmb();
+ SetPagePrivate(page);
+}
+
+static inline void z_erofs_onlinepage_split(struct page *page)
+{
+ atomic_inc((atomic_t *)&page->private);
+}
+
+static inline void z_erofs_page_mark_eio(struct page *page)
+{
+ int orig;
+
+ do {
+ orig = atomic_read((atomic_t *)&page->private);
+ } while (atomic_cmpxchg((atomic_t *)&page->private, orig,
+ orig | Z_EROFS_PAGE_EIO) != orig);
+}
+
+static inline void z_erofs_onlinepage_endio(struct page *page)
+{
+ unsigned int v;
+
+ DBG_BUGON(!PagePrivate(page));
+ v = atomic_dec_return((atomic_t *)&page->private);
+ if (!(v & ~Z_EROFS_PAGE_EIO)) {
+ set_page_private(page, 0);
+ ClearPagePrivate(page);
+ if (!(v & Z_EROFS_PAGE_EIO))
+ SetPageUptodate(page);
+ unlock_page(page);
+ }
+}
+
+#define Z_EROFS_ONSTACK_PAGES 32
+
/*
* since pclustersize is variable for big pcluster feature, introduce slab
* pools implementation for different pcluster sizes.
@@ -175,35 +340,130 @@ static void z_erofs_free_pcluster(struct z_erofs_pcluster *pcl)
DBG_BUGON(1);
}
-/*
- * tagged pointer with 1-bit tag for all compressed pages
- * tag 0 - the page is just found with an extra page reference
- */
-typedef tagptr1_t compressed_page_t;
+static struct workqueue_struct *z_erofs_workqueue __read_mostly;
-#define tag_compressed_page_justfound(page) \
- tagptr_fold(compressed_page_t, page, 1)
+#ifdef CONFIG_EROFS_FS_PCPU_KTHREAD
+static struct kthread_worker __rcu **z_erofs_pcpu_workers;
-static struct workqueue_struct *z_erofs_workqueue __read_mostly;
+static void erofs_destroy_percpu_workers(void)
+{
+ struct kthread_worker *worker;
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu) {
+ worker = rcu_dereference_protected(
+ z_erofs_pcpu_workers[cpu], 1);
+ rcu_assign_pointer(z_erofs_pcpu_workers[cpu], NULL);
+ if (worker)
+ kthread_destroy_worker(worker);
+ }
+ kfree(z_erofs_pcpu_workers);
+}
-void z_erofs_exit_zip_subsystem(void)
+static struct kthread_worker *erofs_init_percpu_worker(int cpu)
{
- destroy_workqueue(z_erofs_workqueue);
- z_erofs_destroy_pcluster_pool();
+ struct kthread_worker *worker =
+ kthread_create_worker_on_cpu(cpu, 0, "erofs_worker/%u", cpu);
+
+ if (IS_ERR(worker))
+ return worker;
+ if (IS_ENABLED(CONFIG_EROFS_FS_PCPU_KTHREAD_HIPRI))
+ sched_set_fifo_low(worker->task);
+ else
+ sched_set_normal(worker->task, 0);
+ return worker;
}
-static inline int z_erofs_init_workqueue(void)
+static int erofs_init_percpu_workers(void)
{
- const unsigned int onlinecpus = num_possible_cpus();
+ struct kthread_worker *worker;
+ unsigned int cpu;
- /*
- * no need to spawn too many threads, limiting threads could minimum
- * scheduling overhead, perhaps per-CPU threads should be better?
- */
- z_erofs_workqueue = alloc_workqueue("erofs_unzipd",
- WQ_UNBOUND | WQ_HIGHPRI,
- onlinecpus + onlinecpus / 4);
- return z_erofs_workqueue ? 0 : -ENOMEM;
+ z_erofs_pcpu_workers = kcalloc(num_possible_cpus(),
+ sizeof(struct kthread_worker *), GFP_ATOMIC);
+ if (!z_erofs_pcpu_workers)
+ return -ENOMEM;
+
+ for_each_online_cpu(cpu) { /* could miss cpu{off,on}line? */
+ worker = erofs_init_percpu_worker(cpu);
+ if (!IS_ERR(worker))
+ rcu_assign_pointer(z_erofs_pcpu_workers[cpu], worker);
+ }
+ return 0;
+}
+#else
+static inline void erofs_destroy_percpu_workers(void) {}
+static inline int erofs_init_percpu_workers(void) { return 0; }
+#endif
+
+#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_EROFS_FS_PCPU_KTHREAD)
+static DEFINE_SPINLOCK(z_erofs_pcpu_worker_lock);
+static enum cpuhp_state erofs_cpuhp_state;
+
+static int erofs_cpu_online(unsigned int cpu)
+{
+ struct kthread_worker *worker, *old;
+
+ worker = erofs_init_percpu_worker(cpu);
+ if (IS_ERR(worker))
+ return PTR_ERR(worker);
+
+ spin_lock(&z_erofs_pcpu_worker_lock);
+ old = rcu_dereference_protected(z_erofs_pcpu_workers[cpu],
+ lockdep_is_held(&z_erofs_pcpu_worker_lock));
+ if (!old)
+ rcu_assign_pointer(z_erofs_pcpu_workers[cpu], worker);
+ spin_unlock(&z_erofs_pcpu_worker_lock);
+ if (old)
+ kthread_destroy_worker(worker);
+ return 0;
+}
+
+static int erofs_cpu_offline(unsigned int cpu)
+{
+ struct kthread_worker *worker;
+
+ spin_lock(&z_erofs_pcpu_worker_lock);
+ worker = rcu_dereference_protected(z_erofs_pcpu_workers[cpu],
+ lockdep_is_held(&z_erofs_pcpu_worker_lock));
+ rcu_assign_pointer(z_erofs_pcpu_workers[cpu], NULL);
+ spin_unlock(&z_erofs_pcpu_worker_lock);
+
+ synchronize_rcu();
+ if (worker)
+ kthread_destroy_worker(worker);
+ return 0;
+}
+
+static int erofs_cpu_hotplug_init(void)
+{
+ int state;
+
+ state = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+ "fs/erofs:online", erofs_cpu_online, erofs_cpu_offline);
+ if (state < 0)
+ return state;
+
+ erofs_cpuhp_state = state;
+ return 0;
+}
+
+static void erofs_cpu_hotplug_destroy(void)
+{
+ if (erofs_cpuhp_state)
+ cpuhp_remove_state_nocalls(erofs_cpuhp_state);
+}
+#else /* !CONFIG_HOTPLUG_CPU || !CONFIG_EROFS_FS_PCPU_KTHREAD */
+static inline int erofs_cpu_hotplug_init(void) { return 0; }
+static inline void erofs_cpu_hotplug_destroy(void) {}
+#endif
+
+void z_erofs_exit_zip_subsystem(void)
+{
+ erofs_cpu_hotplug_destroy();
+ erofs_destroy_percpu_workers();
+ destroy_workqueue(z_erofs_workqueue);
+ z_erofs_destroy_pcluster_pool();
}
int __init z_erofs_init_zip_subsystem(void)
@@ -211,10 +471,31 @@ int __init z_erofs_init_zip_subsystem(void)
int err = z_erofs_create_pcluster_pool();
if (err)
- return err;
- err = z_erofs_init_workqueue();
+ goto out_error_pcluster_pool;
+
+ z_erofs_workqueue = alloc_workqueue("erofs_worker",
+ WQ_UNBOUND | WQ_HIGHPRI, num_possible_cpus());
+ if (!z_erofs_workqueue) {
+ err = -ENOMEM;
+ goto out_error_workqueue_init;
+ }
+
+ err = erofs_init_percpu_workers();
if (err)
- z_erofs_destroy_pcluster_pool();
+ goto out_error_pcpu_worker;
+
+ err = erofs_cpu_hotplug_init();
+ if (err < 0)
+ goto out_error_cpuhp_init;
+ return err;
+
+out_error_cpuhp_init:
+ erofs_destroy_percpu_workers();
+out_error_pcpu_worker:
+ destroy_workqueue(z_erofs_workqueue);
+out_error_workqueue_init:
+ z_erofs_destroy_pcluster_pool();
+out_error_pcluster_pool:
return err;
}
@@ -319,7 +600,7 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
for (i = 0; i < pcl->pclusterpages; ++i) {
struct page *page;
- compressed_page_t t;
+ void *t; /* mark pages just found for debugging */
struct page *newpage = NULL;
/* the compressed page was loaded before */
@@ -329,7 +610,7 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
page = find_get_page(mc, pcl->obj.index + i);
if (page) {
- t = tag_compressed_page_justfound(page);
+ t = (void *)((unsigned long)page | 1);
} else {
/* I/O is needed, no possible to decompress directly */
standalone = false;
@@ -345,11 +626,10 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
if (!newpage)
continue;
set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
- t = tag_compressed_page_justfound(newpage);
+ t = (void *)((unsigned long)newpage | 1);
}
- if (!cmpxchg_relaxed(&pcl->compressed_bvecs[i].page, NULL,
- tagptr_cast_ptr(t)))
+ if (!cmpxchg_relaxed(&pcl->compressed_bvecs[i].page, NULL, t))
continue;
if (page)
@@ -1032,12 +1312,12 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
if (!be->decompressed_pages)
be->decompressed_pages =
- kvcalloc(be->nr_pages, sizeof(struct page *),
- GFP_KERNEL | __GFP_NOFAIL);
+ kcalloc(be->nr_pages, sizeof(struct page *),
+ GFP_KERNEL | __GFP_NOFAIL);
if (!be->compressed_pages)
be->compressed_pages =
- kvcalloc(pclusterpages, sizeof(struct page *),
- GFP_KERNEL | __GFP_NOFAIL);
+ kcalloc(pclusterpages, sizeof(struct page *),
+ GFP_KERNEL | __GFP_NOFAIL);
z_erofs_parse_out_bvecs(be);
err2 = z_erofs_parse_in_bvecs(be, &overlapped);
@@ -1085,7 +1365,7 @@ out:
}
if (be->compressed_pages < be->onstack_pages ||
be->compressed_pages >= be->onstack_pages + Z_EROFS_ONSTACK_PAGES)
- kvfree(be->compressed_pages);
+ kfree(be->compressed_pages);
z_erofs_fill_other_copies(be, err);
for (i = 0; i < be->nr_pages; ++i) {
@@ -1104,7 +1384,7 @@ out:
}
if (be->decompressed_pages != be->onstack_pages)
- kvfree(be->decompressed_pages);
+ kfree(be->decompressed_pages);
pcl->length = 0;
pcl->partial = true;
@@ -1151,18 +1431,24 @@ static void z_erofs_decompressqueue_work(struct work_struct *work)
DBG_BUGON(bgq->head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
z_erofs_decompress_queue(bgq, &pagepool);
-
erofs_release_pages(&pagepool);
kvfree(bgq);
}
+#ifdef CONFIG_EROFS_FS_PCPU_KTHREAD
+static void z_erofs_decompressqueue_kthread_work(struct kthread_work *work)
+{
+ z_erofs_decompressqueue_work((struct work_struct *)work);
+}
+#endif
+
static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
- bool sync, int bios)
+ int bios)
{
struct erofs_sb_info *const sbi = EROFS_SB(io->sb);
/* wake up the caller thread for sync decompression */
- if (sync) {
+ if (io->sync) {
if (!atomic_add_return(bios, &io->pending_bios))
complete(&io->u.done);
return;
@@ -1170,9 +1456,24 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
if (atomic_add_return(bios, &io->pending_bios))
return;
- /* Use workqueue and sync decompression for atomic contexts only */
+ /* Use (kthread_)work and sync decompression for atomic contexts only */
if (in_atomic() || irqs_disabled()) {
+#ifdef CONFIG_EROFS_FS_PCPU_KTHREAD
+ struct kthread_worker *worker;
+
+ rcu_read_lock();
+ worker = rcu_dereference(
+ z_erofs_pcpu_workers[raw_smp_processor_id()]);
+ if (!worker) {
+ INIT_WORK(&io->u.work, z_erofs_decompressqueue_work);
+ queue_work(z_erofs_workqueue, &io->u.work);
+ } else {
+ kthread_queue_work(worker, &io->u.kthread_work);
+ }
+ rcu_read_unlock();
+#else
queue_work(z_erofs_workqueue, &io->u.work);
+#endif
/* enable sync decompression for readahead */
if (sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO)
sbi->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON;
@@ -1192,8 +1493,6 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
struct address_space *mapping;
struct page *oldpage, *page;
-
- compressed_page_t t;
int justfound;
repeat:
@@ -1203,10 +1502,8 @@ repeat:
if (!page)
goto out_allocpage;
- /* process the target tagged pointer */
- t = tagptr_init(compressed_page_t, page);
- justfound = tagptr_unfold_tags(t);
- page = tagptr_unfold_ptr(t);
+ justfound = (unsigned long)page & 1UL;
+ page = (struct page *)((unsigned long)page & ~1UL);
/*
* preallocated cached pages, which is used to avoid direct reclaim
@@ -1294,9 +1591,8 @@ out: /* the only exit (for tracing and debugging) */
return page;
}
-static struct z_erofs_decompressqueue *
-jobqueue_init(struct super_block *sb,
- struct z_erofs_decompressqueue *fgq, bool *fg)
+static struct z_erofs_decompressqueue *jobqueue_init(struct super_block *sb,
+ struct z_erofs_decompressqueue *fgq, bool *fg)
{
struct z_erofs_decompressqueue *q;
@@ -1306,13 +1602,19 @@ jobqueue_init(struct super_block *sb,
*fg = true;
goto fg_out;
}
+#ifdef CONFIG_EROFS_FS_PCPU_KTHREAD
+ kthread_init_work(&q->u.kthread_work,
+ z_erofs_decompressqueue_kthread_work);
+#else
INIT_WORK(&q->u.work, z_erofs_decompressqueue_work);
+#endif
} else {
fg_out:
q = fgq;
init_completion(&fgq->u.done);
atomic_set(&fgq->pending_bios, 0);
q->eio = false;
+ q->sync = true;
}
q->sb = sb;
q->head = Z_EROFS_PCLUSTER_TAIL_CLOSED;
@@ -1326,20 +1628,6 @@ enum {
NR_JOBQUEUES,
};
-static void *jobqueueset_init(struct super_block *sb,
- struct z_erofs_decompressqueue *q[],
- struct z_erofs_decompressqueue *fgq, bool *fg)
-{
- /*
- * if managed cache is enabled, bypass jobqueue is needed,
- * no need to read from device for all pclusters in this queue.
- */
- q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, NULL);
- q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, fg);
-
- return tagptr_cast_ptr(tagptr_fold(tagptr1_t, q[JQ_SUBMIT], *fg));
-}
-
static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
z_erofs_next_pcluster_t qtail[],
z_erofs_next_pcluster_t owned_head)
@@ -1361,8 +1649,7 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
static void z_erofs_decompressqueue_endio(struct bio *bio)
{
- tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
- struct z_erofs_decompressqueue *q = tagptr_unfold_ptr(t);
+ struct z_erofs_decompressqueue *q = bio->bi_private;
blk_status_t err = bio->bi_status;
struct bio_vec *bvec;
struct bvec_iter_all iter_all;
@@ -1381,7 +1668,7 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)
}
if (err)
q->eio = true;
- z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1);
+ z_erofs_decompress_kickoff(q, -1);
bio_put(bio);
}
@@ -1394,7 +1681,6 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
struct address_space *mc = MNGD_MAPPING(EROFS_SB(sb));
z_erofs_next_pcluster_t qtail[NR_JOBQUEUES];
struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
- void *bi_private;
z_erofs_next_pcluster_t owned_head = f->owned_head;
/* bio is NULL initially, so no need to initialize last_{index,bdev} */
pgoff_t last_index;
@@ -1404,7 +1690,13 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
unsigned long pflags;
int memstall = 0;
- bi_private = jobqueueset_init(sb, q, fgq, force_fg);
+ /*
+ * if managed cache is enabled, bypass jobqueue is needed,
+ * no need to read from device for all pclusters in this queue.
+ */
+ q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, NULL);
+ q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, force_fg);
+
qtail[JQ_BYPASS] = &q[JQ_BYPASS]->head;
qtail[JQ_SUBMIT] = &q[JQ_SUBMIT]->head;
@@ -1473,7 +1765,7 @@ submit_bio_retry:
last_bdev = mdev.m_bdev;
bio->bi_iter.bi_sector = (sector_t)cur <<
LOG_SECTORS_PER_BLOCK;
- bio->bi_private = bi_private;
+ bio->bi_private = q[JQ_SUBMIT];
if (f->readahead)
bio->bi_opf |= REQ_RAHEAD;
++nr_bios;
@@ -1500,13 +1792,13 @@ submit_bio_retry:
/*
* although background is preferred, no one is pending for submission.
- * don't issue workqueue for decompression but drop it directly instead.
+ * don't issue decompression but drop it directly instead.
*/
if (!*force_fg && !nr_bios) {
kvfree(q[JQ_SUBMIT]);
return;
}
- z_erofs_decompress_kickoff(q[JQ_SUBMIT], *force_fg, nr_bios);
+ z_erofs_decompress_kickoff(q[JQ_SUBMIT], nr_bios);
}
static void z_erofs_runqueue(struct z_erofs_decompress_frontend *f,
diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h
deleted file mode 100644
index d98c95212985..000000000000
--- a/fs/erofs/zdata.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2018 HUAWEI, Inc.
- * https://www.huawei.com/
- */
-#ifndef __EROFS_FS_ZDATA_H
-#define __EROFS_FS_ZDATA_H
-
-#include "internal.h"
-#include "tagptr.h"
-
-#define Z_EROFS_PCLUSTER_MAX_PAGES (Z_EROFS_PCLUSTER_MAX_SIZE / PAGE_SIZE)
-#define Z_EROFS_INLINE_BVECS 2
-
-/*
- * let's leave a type here in case of introducing
- * another tagged pointer later.
- */
-typedef void *z_erofs_next_pcluster_t;
-
-struct z_erofs_bvec {
- struct page *page;
- int offset;
- unsigned int end;
-};
-
-#define __Z_EROFS_BVSET(name, total) \
-struct name { \
- /* point to the next page which contains the following bvecs */ \
- struct page *nextpage; \
- struct z_erofs_bvec bvec[total]; \
-}
-__Z_EROFS_BVSET(z_erofs_bvset,);
-__Z_EROFS_BVSET(z_erofs_bvset_inline, Z_EROFS_INLINE_BVECS);
-
-/*
- * Structure fields follow one of the following exclusion rules.
- *
- * I: Modifiable by initialization/destruction paths and read-only
- * for everyone else;
- *
- * L: Field should be protected by the pcluster lock;
- *
- * A: Field should be accessed / updated in atomic for parallelized code.
- */
-struct z_erofs_pcluster {
- struct erofs_workgroup obj;
- struct mutex lock;
-
- /* A: point to next chained pcluster or TAILs */
- z_erofs_next_pcluster_t next;
-
- /* L: the maximum decompression size of this round */
- unsigned int length;
-
- /* L: total number of bvecs */
- unsigned int vcnt;
-
- /* I: page offset of start position of decompression */
- unsigned short pageofs_out;
-
- /* I: page offset of inline compressed data */
- unsigned short pageofs_in;
-
- union {
- /* L: inline a certain number of bvec for bootstrap */
- struct z_erofs_bvset_inline bvset;
-
- /* I: can be used to free the pcluster by RCU. */
- struct rcu_head rcu;
- };
-
- union {
- /* I: physical cluster size in pages */
- unsigned short pclusterpages;
-
- /* I: tailpacking inline compressed size */
- unsigned short tailpacking_size;
- };
-
- /* I: compression algorithm format */
- unsigned char algorithmformat;
-
- /* L: whether partial decompression or not */
- bool partial;
-
- /* L: indicate several pageofs_outs or not */
- bool multibases;
-
- /* A: compressed bvecs (can be cached or inplaced pages) */
- struct z_erofs_bvec compressed_bvecs[];
-};
-
-/* let's avoid the valid 32-bit kernel addresses */
-
-/* the chained workgroup has't submitted io (still open) */
-#define Z_EROFS_PCLUSTER_TAIL ((void *)0x5F0ECAFE)
-/* the chained workgroup has already submitted io */
-#define Z_EROFS_PCLUSTER_TAIL_CLOSED ((void *)0x5F0EDEAD)
-
-#define Z_EROFS_PCLUSTER_NIL (NULL)
-
-struct z_erofs_decompressqueue {
- struct super_block *sb;
- atomic_t pending_bios;
- z_erofs_next_pcluster_t head;
-
- union {
- struct completion done;
- struct work_struct work;
- } u;
-
- bool eio;
-};
-
-static inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl)
-{
- return !pcl->obj.index;
-}
-
-static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
-{
- if (z_erofs_is_inline_pcluster(pcl))
- return 1;
- return pcl->pclusterpages;
-}
-
-/*
- * bit 30: I/O error occurred on this page
- * bit 0 - 29: remaining parts to complete this page
- */
-#define Z_EROFS_PAGE_EIO (1 << 30)
-
-static inline void z_erofs_onlinepage_init(struct page *page)
-{
- union {
- atomic_t o;
- unsigned long v;
- } u = { .o = ATOMIC_INIT(1) };
-
- set_page_private(page, u.v);
- smp_wmb();
- SetPagePrivate(page);
-}
-
-static inline void z_erofs_onlinepage_split(struct page *page)
-{
- atomic_inc((atomic_t *)&page->private);
-}
-
-static inline void z_erofs_page_mark_eio(struct page *page)
-{
- int orig;
-
- do {
- orig = atomic_read((atomic_t *)&page->private);
- } while (atomic_cmpxchg((atomic_t *)&page->private, orig,
- orig | Z_EROFS_PAGE_EIO) != orig);
-}
-
-static inline void z_erofs_onlinepage_endio(struct page *page)
-{
- unsigned int v;
-
- DBG_BUGON(!PagePrivate(page));
- v = atomic_dec_return((atomic_t *)&page->private);
- if (!(v & ~Z_EROFS_PAGE_EIO)) {
- set_page_private(page, 0);
- ClearPagePrivate(page);
- if (!(v & Z_EROFS_PAGE_EIO))
- SetPageUptodate(page);
- unlock_page(page);
- }
-}
-
-#define Z_EROFS_ONSTACK_PAGES 32
-
-#endif
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 0150570c33aa..8bf6d30518b6 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -7,10 +7,6 @@
#include <asm/unaligned.h>
#include <trace/events/erofs.h>
-static int z_erofs_do_map_blocks(struct inode *inode,
- struct erofs_map_blocks *map,
- int flags);
-
int z_erofs_fill_inode(struct inode *inode)
{
struct erofs_inode *const vi = EROFS_I(inode);
@@ -29,126 +25,6 @@ int z_erofs_fill_inode(struct inode *inode)
return 0;
}
-static int z_erofs_fill_inode_lazy(struct inode *inode)
-{
- struct erofs_inode *const vi = EROFS_I(inode);
- struct super_block *const sb = inode->i_sb;
- int err, headnr;
- erofs_off_t pos;
- struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
- void *kaddr;
- struct z_erofs_map_header *h;
-
- if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) {
- /*
- * paired with smp_mb() at the end of the function to ensure
- * fields will only be observed after the bit is set.
- */
- smp_mb();
- return 0;
- }
-
- if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_Z_BIT, TASK_KILLABLE))
- return -ERESTARTSYS;
-
- err = 0;
- if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags))
- goto out_unlock;
-
- pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
- vi->xattr_isize, 8);
- kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), EROFS_KMAP);
- if (IS_ERR(kaddr)) {
- err = PTR_ERR(kaddr);
- goto out_unlock;
- }
-
- h = kaddr + erofs_blkoff(pos);
- /*
- * if the highest bit of the 8-byte map header is set, the whole file
- * is stored in the packed inode. The rest bits keeps z_fragmentoff.
- */
- if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) {
- vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
- vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63);
- vi->z_tailextent_headlcn = 0;
- goto done;
- }
- vi->z_advise = le16_to_cpu(h->h_advise);
- vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
- vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
-
- headnr = 0;
- if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX ||
- vi->z_algorithmtype[++headnr] >= Z_EROFS_COMPRESSION_MAX) {
- erofs_err(sb, "unknown HEAD%u format %u for nid %llu, please upgrade kernel",
- headnr + 1, vi->z_algorithmtype[headnr], vi->nid);
- err = -EOPNOTSUPP;
- goto out_put_metabuf;
- }
-
- vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7);
- if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
- vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
- Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
- erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu",
- vi->nid);
- err = -EFSCORRUPTED;
- goto out_put_metabuf;
- }
- if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION &&
- !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^
- !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
- erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu",
- vi->nid);
- err = -EFSCORRUPTED;
- goto out_put_metabuf;
- }
-
- if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) {
- struct erofs_map_blocks map = {
- .buf = __EROFS_BUF_INITIALIZER
- };
-
- vi->z_idata_size = le16_to_cpu(h->h_idata_size);
- err = z_erofs_do_map_blocks(inode, &map,
- EROFS_GET_BLOCKS_FINDTAIL);
- erofs_put_metabuf(&map.buf);
-
- if (!map.m_plen ||
- erofs_blkoff(map.m_pa) + map.m_plen > EROFS_BLKSIZ) {
- erofs_err(sb, "invalid tail-packing pclustersize %llu",
- map.m_plen);
- err = -EFSCORRUPTED;
- }
- if (err < 0)
- goto out_put_metabuf;
- }
-
- if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER &&
- !(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) {
- struct erofs_map_blocks map = {
- .buf = __EROFS_BUF_INITIALIZER
- };
-
- vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff);
- err = z_erofs_do_map_blocks(inode, &map,
- EROFS_GET_BLOCKS_FINDTAIL);
- erofs_put_metabuf(&map.buf);
- if (err < 0)
- goto out_put_metabuf;
- }
-done:
- /* paired with smp_mb() at the beginning of the function */
- smp_mb();
- set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
-out_put_metabuf:
- erofs_put_metabuf(&buf);
-out_unlock:
- clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags);
- return err;
-}
-
struct z_erofs_maprecorder {
struct inode *inode;
struct erofs_map_blocks *map;
@@ -169,10 +45,9 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
{
struct inode *const inode = m->inode;
struct erofs_inode *const vi = EROFS_I(inode);
- const erofs_off_t ibase = iloc(EROFS_I_SB(inode), vi->nid);
const erofs_off_t pos =
- Z_EROFS_VLE_LEGACY_INDEX_ALIGN(ibase + vi->inode_isize +
- vi->xattr_isize) +
+ Z_EROFS_VLE_LEGACY_INDEX_ALIGN(erofs_iloc(inode) +
+ vi->inode_isize + vi->xattr_isize) +
lcn * sizeof(struct z_erofs_vle_decompressed_index);
struct z_erofs_vle_decompressed_index *di;
unsigned int advise, type;
@@ -372,9 +247,8 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
struct inode *const inode = m->inode;
struct erofs_inode *const vi = EROFS_I(inode);
const unsigned int lclusterbits = vi->z_logical_clusterbits;
- const erofs_off_t ebase = ALIGN(iloc(EROFS_I_SB(inode), vi->nid) +
- vi->inode_isize + vi->xattr_isize, 8) +
- sizeof(struct z_erofs_map_header);
+ const erofs_off_t ebase = sizeof(struct z_erofs_map_header) +
+ ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
const unsigned int totalidx = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
unsigned int compacted_4b_initial, compacted_2b;
unsigned int amortizedshift;
@@ -732,6 +606,125 @@ unmap_out:
return err;
}
+static int z_erofs_fill_inode_lazy(struct inode *inode)
+{
+ struct erofs_inode *const vi = EROFS_I(inode);
+ struct super_block *const sb = inode->i_sb;
+ int err, headnr;
+ erofs_off_t pos;
+ struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
+ void *kaddr;
+ struct z_erofs_map_header *h;
+
+ if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) {
+ /*
+ * paired with smp_mb() at the end of the function to ensure
+ * fields will only be observed after the bit is set.
+ */
+ smp_mb();
+ return 0;
+ }
+
+ if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_Z_BIT, TASK_KILLABLE))
+ return -ERESTARTSYS;
+
+ err = 0;
+ if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags))
+ goto out_unlock;
+
+ pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
+ kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), EROFS_KMAP);
+ if (IS_ERR(kaddr)) {
+ err = PTR_ERR(kaddr);
+ goto out_unlock;
+ }
+
+ h = kaddr + erofs_blkoff(pos);
+ /*
+ * if the highest bit of the 8-byte map header is set, the whole file
+ * is stored in the packed inode. The rest bits keeps z_fragmentoff.
+ */
+ if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) {
+ vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
+ vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63);
+ vi->z_tailextent_headlcn = 0;
+ goto done;
+ }
+ vi->z_advise = le16_to_cpu(h->h_advise);
+ vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
+ vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
+
+ headnr = 0;
+ if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX ||
+ vi->z_algorithmtype[++headnr] >= Z_EROFS_COMPRESSION_MAX) {
+ erofs_err(sb, "unknown HEAD%u format %u for nid %llu, please upgrade kernel",
+ headnr + 1, vi->z_algorithmtype[headnr], vi->nid);
+ err = -EOPNOTSUPP;
+ goto out_put_metabuf;
+ }
+
+ vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7);
+ if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
+ vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
+ Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
+ erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu",
+ vi->nid);
+ err = -EFSCORRUPTED;
+ goto out_put_metabuf;
+ }
+ if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION &&
+ !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^
+ !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
+ erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu",
+ vi->nid);
+ err = -EFSCORRUPTED;
+ goto out_put_metabuf;
+ }
+
+ if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) {
+ struct erofs_map_blocks map = {
+ .buf = __EROFS_BUF_INITIALIZER
+ };
+
+ vi->z_idata_size = le16_to_cpu(h->h_idata_size);
+ err = z_erofs_do_map_blocks(inode, &map,
+ EROFS_GET_BLOCKS_FINDTAIL);
+ erofs_put_metabuf(&map.buf);
+
+ if (!map.m_plen ||
+ erofs_blkoff(map.m_pa) + map.m_plen > EROFS_BLKSIZ) {
+ erofs_err(sb, "invalid tail-packing pclustersize %llu",
+ map.m_plen);
+ err = -EFSCORRUPTED;
+ }
+ if (err < 0)
+ goto out_put_metabuf;
+ }
+
+ if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER &&
+ !(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) {
+ struct erofs_map_blocks map = {
+ .buf = __EROFS_BUF_INITIALIZER
+ };
+
+ vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff);
+ err = z_erofs_do_map_blocks(inode, &map,
+ EROFS_GET_BLOCKS_FINDTAIL);
+ erofs_put_metabuf(&map.buf);
+ if (err < 0)
+ goto out_put_metabuf;
+ }
+done:
+ /* paired with smp_mb() at the beginning of the function */
+ smp_mb();
+ set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
+out_put_metabuf:
+ erofs_put_metabuf(&buf);
+out_unlock:
+ clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags);
+ return err;
+}
+
int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
int flags)
{
@@ -793,12 +786,16 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset,
iomap->type = IOMAP_HOLE;
iomap->addr = IOMAP_NULL_ADDR;
/*
- * No strict rule how to describe extents for post EOF, yet
- * we need do like below. Otherwise, iomap itself will get
+ * No strict rule on how to describe extents for post EOF, yet
+ * we need to do like below. Otherwise, iomap itself will get
* into an endless loop on post EOF.
+ *
+ * Calculate the effective offset by subtracting extent start
+ * (map.m_la) from the requested offset, and add it to length.
+ * (NB: offset >= map.m_la always)
*/
if (iomap->offset >= inode->i_size)
- iomap->length = length + map.m_la - offset;
+ iomap->length = length + offset - map.m_la;
}
iomap->flags = 0;
return 0;
diff --git a/fs/exec.c b/fs/exec.c
index ab913243a367..3d2b80d8d58e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1414,15 +1414,15 @@ EXPORT_SYMBOL(begin_new_exec);
void would_dump(struct linux_binprm *bprm, struct file *file)
{
struct inode *inode = file_inode(file);
- struct user_namespace *mnt_userns = file_mnt_user_ns(file);
- if (inode_permission(mnt_userns, inode, MAY_READ) < 0) {
+ struct mnt_idmap *idmap = file_mnt_idmap(file);
+ if (inode_permission(idmap, inode, MAY_READ) < 0) {
struct user_namespace *old, *user_ns;
bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
/* Ensure mm->user_ns contains the executable */
user_ns = old = bprm->mm->user_ns;
while ((user_ns != &init_user_ns) &&
- !privileged_wrt_inode_uidgid(user_ns, mnt_userns, inode))
+ !privileged_wrt_inode_uidgid(user_ns, idmap, inode))
user_ns = user_ns->parent;
if (old != user_ns) {
@@ -1596,7 +1596,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
{
/* Handle suid and sgid on files */
- struct user_namespace *mnt_userns;
+ struct mnt_idmap *idmap;
struct inode *inode = file_inode(file);
unsigned int mode;
vfsuid_t vfsuid;
@@ -1612,15 +1612,15 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
if (!(mode & (S_ISUID|S_ISGID)))
return;
- mnt_userns = file_mnt_user_ns(file);
+ idmap = file_mnt_idmap(file);
/* Be careful if suid/sgid is set */
inode_lock(inode);
/* reload atomically mode/uid/gid now that lock held */
mode = inode->i_mode;
- vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
- vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
+ vfsuid = i_uid_into_vfsuid(idmap, inode);
+ vfsgid = i_gid_into_vfsgid(idmap, inode);
inode_unlock(inode);
/* We ignore suid/sgid if there are no mappings for them in the ns */
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index bc6d21d7c5ad..1bf16abe3c84 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -450,9 +450,9 @@ int exfat_trim_fs(struct inode *inode, struct fstrim_range *range);
extern const struct file_operations exfat_file_operations;
int __exfat_truncate(struct inode *inode);
void exfat_truncate(struct inode *inode);
-int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr);
-int exfat_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int exfat_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, unsigned int request_mask,
unsigned int query_flags);
int exfat_file_fsync(struct file *file, loff_t start, loff_t end, int datasync);
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index f5b29072775d..1fdb0a64b91d 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -226,14 +226,14 @@ write_size:
mutex_unlock(&sbi->s_lock);
}
-int exfat_getattr(struct user_namespace *mnt_uerns, const struct path *path,
+int exfat_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, unsigned int request_mask,
unsigned int query_flags)
{
struct inode *inode = d_backing_inode(path->dentry);
struct exfat_inode_info *ei = EXFAT_I(inode);
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
exfat_truncate_atime(&stat->atime);
stat->result_mask |= STATX_BTIME;
stat->btime.tv_sec = ei->i_crtime.tv_sec;
@@ -242,7 +242,7 @@ int exfat_getattr(struct user_namespace *mnt_uerns, const struct path *path,
return 0;
}
-int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct exfat_sb_info *sbi = EXFAT_SB(dentry->d_sb);
@@ -266,7 +266,7 @@ int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
ATTR_TIMES_SET);
}
- error = setattr_prepare(&init_user_ns, dentry, attr);
+ error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
attr->ia_valid = ia_valid;
if (error)
goto out;
@@ -293,7 +293,7 @@ int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if (attr->ia_valid & ATTR_SIZE)
inode->i_mtime = inode->i_ctime = current_time(inode);
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(&nop_mnt_idmap, inode, attr);
exfat_truncate_atime(&inode->i_atime);
if (attr->ia_valid & ATTR_SIZE) {
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 5f995eba5dbb..02aab4c3a5f7 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -551,7 +551,7 @@ out:
return ret;
}
-static int exfat_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int exfat_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
struct super_block *sb = dir->i_sb;
@@ -834,7 +834,7 @@ unlock:
return err;
}
-static int exfat_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int exfat_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct super_block *sb = dir->i_sb;
@@ -1285,7 +1285,7 @@ out:
return ret;
}
-static int exfat_rename(struct user_namespace *mnt_userns,
+static int exfat_rename(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 3204bd33e4e8..ab88d33d106c 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -145,7 +145,7 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
if (err)
goto out_err;
dprintk("%s: found name: %s\n", __func__, nbuf);
- tmp = lookup_one_unlocked(mnt_user_ns(mnt), nbuf, parent, strlen(nbuf));
+ tmp = lookup_one_unlocked(mnt_idmap(mnt), nbuf, parent, strlen(nbuf));
if (IS_ERR(tmp)) {
dprintk("lookup failed: %ld\n", PTR_ERR(tmp));
err = PTR_ERR(tmp);
@@ -524,7 +524,7 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len,
}
inode_lock(target_dir->d_inode);
- nresult = lookup_one(mnt_user_ns(mnt), nbuf,
+ nresult = lookup_one(mnt_idmap(mnt), nbuf,
target_dir, strlen(nbuf));
if (!IS_ERR(nresult)) {
if (unlikely(nresult->d_inode != result->d_inode)) {
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 440d5f1e9d47..82b17d7fc93f 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -219,7 +219,7 @@ __ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
* inode->i_mutex: down
*/
int
-ext2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+ext2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
int error;
@@ -228,7 +228,7 @@ ext2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
umode_t mode = inode->i_mode;
if (type == ACL_TYPE_ACCESS && acl) {
- error = posix_acl_update_mode(&init_user_ns, inode, &mode,
+ error = posix_acl_update_mode(&nop_mnt_idmap, inode, &mode,
&acl);
if (error)
return error;
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index 3841becb94ff..4a8443a2b8ec 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -56,7 +56,7 @@ static inline int ext2_acl_count(size_t size)
/* acl.c */
extern struct posix_acl *ext2_get_acl(struct inode *inode, int type, bool rcu);
-extern int ext2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+extern int ext2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type);
extern int ext2_init_acl (struct inode *, struct inode *);
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index e5cbc27ba459..4a6955a0a116 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -461,9 +461,9 @@ static int ext2_handle_dirsync(struct inode *dir)
return err;
}
-void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
- struct page *page, void *page_addr, struct inode *inode,
- int update_times)
+int ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
+ struct page *page, void *page_addr, struct inode *inode,
+ bool update_times)
{
loff_t pos = page_offset(page) +
(char *) de - (char *) page_addr;
@@ -472,7 +472,10 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
lock_page(page);
err = ext2_prepare_chunk(page, pos, len);
- BUG_ON(err);
+ if (err) {
+ unlock_page(page);
+ return err;
+ }
de->inode = cpu_to_le32(inode->i_ino);
ext2_set_de_type(de, inode);
ext2_commit_chunk(page, pos, len);
@@ -480,7 +483,7 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
dir->i_mtime = dir->i_ctime = current_time(dir);
EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
mark_inode_dirty(dir);
- ext2_handle_dirsync(dir);
+ return ext2_handle_dirsync(dir);
}
/*
@@ -646,7 +649,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
unlock_page(page);
goto fail;
}
- kaddr = kmap_atomic(page);
+ kaddr = kmap_local_page(page);
memset(kaddr, 0, chunk_size);
de = (struct ext2_dir_entry_2 *)kaddr;
de->name_len = 1;
@@ -661,7 +664,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
de->inode = cpu_to_le32(parent->i_ino);
memcpy (de->name, "..\0", 4);
ext2_set_de_type (de, inode);
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
ext2_commit_chunk(page, 0, chunk_size);
err = ext2_handle_dirsync(inode);
fail:
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 28de11a22e5f..cb78d7dcfb95 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -734,8 +734,9 @@ extern int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page,
char *kaddr);
extern int ext2_empty_dir (struct inode *);
extern struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p, void **pa);
-extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, void *,
- struct inode *, int);
+int ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
+ struct page *page, void *page_addr, struct inode *inode,
+ bool update_times);
static inline void ext2_put_page(struct page *page, void *page_addr)
{
kunmap_local(page_addr);
@@ -753,8 +754,8 @@ extern struct inode *ext2_iget (struct super_block *, unsigned long);
extern int ext2_write_inode (struct inode *, struct writeback_control *);
extern void ext2_evict_inode(struct inode *);
extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
-extern int ext2_setattr (struct user_namespace *, struct dentry *, struct iattr *);
-extern int ext2_getattr (struct user_namespace *, const struct path *,
+extern int ext2_setattr (struct mnt_idmap *, struct dentry *, struct iattr *);
+extern int ext2_getattr (struct mnt_idmap *, const struct path *,
struct kstat *, u32, unsigned int);
extern void ext2_set_inode_flags(struct inode *inode);
extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
@@ -762,7 +763,7 @@ extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
/* ioctl.c */
extern int ext2_fileattr_get(struct dentry *dentry, struct fileattr *fa);
-extern int ext2_fileattr_set(struct user_namespace *mnt_userns,
+extern int ext2_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
extern long ext2_ioctl(struct file *, unsigned int, unsigned long);
extern long ext2_compat_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 78b8686d9a4a..a4e1d7a9c544 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -545,7 +545,7 @@ got:
inode->i_uid = current_fsuid();
inode->i_gid = dir->i_gid;
} else
- inode_init_owner(&init_user_ns, inode, dir, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
inode->i_ino = ino;
inode->i_blocks = 0;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 69aed9e2359e..26f135e7ffce 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1592,7 +1592,7 @@ int ext2_write_inode(struct inode *inode, struct writeback_control *wbc)
return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
}
-int ext2_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int ext2_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
@@ -1614,28 +1614,28 @@ int ext2_getattr(struct user_namespace *mnt_userns, const struct path *path,
STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP);
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
return 0;
}
-int ext2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ext2_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
int error;
- error = setattr_prepare(&init_user_ns, dentry, iattr);
+ error = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
if (error)
return error;
- if (is_quota_modification(mnt_userns, inode, iattr)) {
+ if (is_quota_modification(&nop_mnt_idmap, inode, iattr)) {
error = dquot_initialize(inode);
if (error)
return error;
}
- if (i_uid_needs_update(mnt_userns, iattr, inode) ||
- i_gid_needs_update(mnt_userns, iattr, inode)) {
- error = dquot_transfer(mnt_userns, inode, iattr);
+ if (i_uid_needs_update(&nop_mnt_idmap, iattr, inode) ||
+ i_gid_needs_update(&nop_mnt_idmap, iattr, inode)) {
+ error = dquot_transfer(&nop_mnt_idmap, inode, iattr);
if (error)
return error;
}
@@ -1644,9 +1644,9 @@ int ext2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if (error)
return error;
}
- setattr_copy(&init_user_ns, inode, iattr);
+ setattr_copy(&nop_mnt_idmap, inode, iattr);
if (iattr->ia_valid & ATTR_MODE)
- error = posix_acl_chmod(&init_user_ns, dentry, inode->i_mode);
+ error = posix_acl_chmod(&nop_mnt_idmap, dentry, inode->i_mode);
mark_inode_dirty(inode);
return error;
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index e8340bf09b10..cc87d413eb43 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -27,7 +27,7 @@ int ext2_fileattr_get(struct dentry *dentry, struct fileattr *fa)
return 0;
}
-int ext2_fileattr_set(struct user_namespace *mnt_userns,
+int ext2_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa)
{
struct inode *inode = d_inode(dentry);
@@ -66,7 +66,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case EXT2_IOC_SETVERSION: {
__u32 generation;
- if (!inode_owner_or_capable(&init_user_ns, inode))
+ if (!inode_owner_or_capable(&nop_mnt_idmap, inode))
return -EPERM;
ret = mnt_want_write_file(filp);
if (ret)
@@ -99,7 +99,7 @@ setversion_out:
if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
return -ENOTTY;
- if (!inode_owner_or_capable(&init_user_ns, inode))
+ if (!inode_owner_or_capable(&nop_mnt_idmap, inode))
return -EACCES;
if (get_user(rsv_window_size, (int __user *)arg))
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index c056957221a2..7f5dfa87cc95 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -99,7 +99,7 @@ struct dentry *ext2_get_parent(struct dentry *child)
* If the create succeeds, we fill in the inode information
* with d_instantiate().
*/
-static int ext2_create (struct user_namespace * mnt_userns,
+static int ext2_create (struct mnt_idmap * idmap,
struct inode * dir, struct dentry * dentry,
umode_t mode, bool excl)
{
@@ -119,7 +119,7 @@ static int ext2_create (struct user_namespace * mnt_userns,
return ext2_add_nondir(dentry, inode);
}
-static int ext2_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+static int ext2_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
struct file *file, umode_t mode)
{
struct inode *inode = ext2_new_inode(dir, mode, NULL);
@@ -133,7 +133,7 @@ static int ext2_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
return finish_open_simple(file, 0);
}
-static int ext2_mknod (struct user_namespace * mnt_userns, struct inode * dir,
+static int ext2_mknod (struct mnt_idmap * idmap, struct inode * dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct inode * inode;
@@ -154,7 +154,7 @@ static int ext2_mknod (struct user_namespace * mnt_userns, struct inode * dir,
return err;
}
-static int ext2_symlink (struct user_namespace * mnt_userns, struct inode * dir,
+static int ext2_symlink (struct mnt_idmap * idmap, struct inode * dir,
struct dentry * dentry, const char * symname)
{
struct super_block * sb = dir->i_sb;
@@ -225,7 +225,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
return err;
}
-static int ext2_mkdir(struct user_namespace * mnt_userns,
+static int ext2_mkdir(struct mnt_idmap * idmap,
struct inode * dir, struct dentry * dentry, umode_t mode)
{
struct inode * inode;
@@ -315,7 +315,7 @@ static int ext2_rmdir (struct inode * dir, struct dentry *dentry)
return err;
}
-static int ext2_rename (struct user_namespace * mnt_userns,
+static int ext2_rename (struct mnt_idmap * idmap,
struct inode * old_dir, struct dentry * old_dentry,
struct inode * new_dir, struct dentry * new_dentry,
unsigned int flags)
@@ -370,8 +370,11 @@ static int ext2_rename (struct user_namespace * mnt_userns,
err = PTR_ERR(new_de);
goto out_dir;
}
- ext2_set_link(new_dir, new_de, new_page, page_addr, old_inode, 1);
+ err = ext2_set_link(new_dir, new_de, new_page, page_addr,
+ old_inode, true);
ext2_put_page(new_page, page_addr);
+ if (err)
+ goto out_dir;
new_inode->i_ctime = current_time(new_inode);
if (dir_de)
drop_nlink(new_inode);
@@ -394,24 +397,24 @@ static int ext2_rename (struct user_namespace * mnt_userns,
ext2_delete_entry(old_de, old_page, old_page_addr);
if (dir_de) {
- if (old_dir != new_dir)
- ext2_set_link(old_inode, dir_de, dir_page,
- dir_page_addr, new_dir, 0);
+ if (old_dir != new_dir) {
+ err = ext2_set_link(old_inode, dir_de, dir_page,
+ dir_page_addr, new_dir, false);
+ }
ext2_put_page(dir_page, dir_page_addr);
inode_dec_link_count(old_dir);
}
+out_old:
ext2_put_page(old_page, old_page_addr);
- return 0;
+out:
+ return err;
out_dir:
if (dir_de)
ext2_put_page(dir_page, dir_page_addr);
-out_old:
- ext2_put_page(old_page, old_page_addr);
-out:
- return err;
+ goto out_old;
}
const struct inode_operations ext2_dir_inode_operations = {
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index ebade1f52451..db47b8ab153e 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -19,7 +19,7 @@ ext2_xattr_security_get(const struct xattr_handler *handler,
static int
ext2_xattr_security_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index 18a87d5dd1ab..995f931228ce 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -26,7 +26,7 @@ ext2_xattr_trusted_get(const struct xattr_handler *handler,
static int
ext2_xattr_trusted_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index 58092449f8ff..dd1507231081 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -30,7 +30,7 @@ ext2_xattr_user_get(const struct xattr_handler *handler,
static int
ext2_xattr_user_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index a9f89539aeee..27fcbddfb148 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -225,7 +225,7 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type,
}
int
-ext4_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+ext4_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
handle_t *handle;
@@ -249,7 +249,7 @@ retry:
return PTR_ERR(handle);
if ((type == ACL_TYPE_ACCESS) && acl) {
- error = posix_acl_update_mode(mnt_userns, inode, &mode, &acl);
+ error = posix_acl_update_mode(idmap, inode, &mode, &acl);
if (error)
goto out_stop;
if (mode != inode->i_mode)
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 09c4a8a3b716..0c5a79c3b5d4 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -56,7 +56,7 @@ static inline int ext4_acl_count(size_t size)
/* acl.c */
struct posix_acl *ext4_get_acl(struct inode *inode, int type, bool rcu);
-int ext4_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ext4_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type);
extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 140e1eb300d1..43e26e6f6e42 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2845,7 +2845,7 @@ extern int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
/* ialloc.c */
extern int ext4_mark_inode_used(struct super_block *sb, int ino);
-extern struct inode *__ext4_new_inode(struct user_namespace *, handle_t *,
+extern struct inode *__ext4_new_inode(struct mnt_idmap *, handle_t *,
struct inode *, umode_t,
const struct qstr *qstr, __u32 goal,
uid_t *owner, __u32 i_flags,
@@ -2853,11 +2853,11 @@ extern struct inode *__ext4_new_inode(struct user_namespace *, handle_t *,
int nblocks);
#define ext4_new_inode(handle, dir, mode, qstr, goal, owner, i_flags) \
- __ext4_new_inode(&init_user_ns, (handle), (dir), (mode), (qstr), \
+ __ext4_new_inode(&nop_mnt_idmap, (handle), (dir), (mode), (qstr), \
(goal), (owner), i_flags, 0, 0, 0)
-#define ext4_new_inode_start_handle(mnt_userns, dir, mode, qstr, goal, owner, \
+#define ext4_new_inode_start_handle(idmap, dir, mode, qstr, goal, owner, \
type, nblocks) \
- __ext4_new_inode((mnt_userns), NULL, (dir), (mode), (qstr), (goal), (owner), \
+ __ext4_new_inode((idmap), NULL, (dir), (mode), (qstr), (goal), (owner), \
0, (type), __LINE__, (nblocks))
@@ -2976,14 +2976,14 @@ extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
__ext4_iget((sb), (ino), (flags), __func__, __LINE__)
extern int ext4_write_inode(struct inode *, struct writeback_control *);
-extern int ext4_setattr(struct user_namespace *, struct dentry *,
+extern int ext4_setattr(struct mnt_idmap *, struct dentry *,
struct iattr *);
extern u32 ext4_dio_alignment(struct inode *inode);
-extern int ext4_getattr(struct user_namespace *, const struct path *,
+extern int ext4_getattr(struct mnt_idmap *, const struct path *,
struct kstat *, u32, unsigned int);
extern void ext4_evict_inode(struct inode *);
extern void ext4_clear_inode(struct inode *);
-extern int ext4_file_getattr(struct user_namespace *, const struct path *,
+extern int ext4_file_getattr(struct mnt_idmap *, const struct path *,
struct kstat *, u32, unsigned int);
extern int ext4_sync_inode(handle_t *, struct inode *);
extern void ext4_dirty_inode(struct inode *, int);
@@ -3024,7 +3024,7 @@ extern int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
/* ioctl.c */
extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
-int ext4_fileattr_set(struct user_namespace *mnt_userns,
+int ext4_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
int ext4_fileattr_get(struct dentry *dentry, struct fileattr *fa);
extern void ext4_reset_inode_seed(struct inode *inode);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 63f9bb6e8851..157663031f8c 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -921,7 +921,7 @@ static int ext4_xattr_credits_for_new_inode(struct inode *dir, mode_t mode,
* For other inodes, search forward from the parent directory's block
* group to find a free inode.
*/
-struct inode *__ext4_new_inode(struct user_namespace *mnt_userns,
+struct inode *__ext4_new_inode(struct mnt_idmap *idmap,
handle_t *handle, struct inode *dir,
umode_t mode, const struct qstr *qstr,
__u32 goal, uid_t *owner, __u32 i_flags,
@@ -972,10 +972,10 @@ struct inode *__ext4_new_inode(struct user_namespace *mnt_userns,
i_gid_write(inode, owner[1]);
} else if (test_opt(sb, GRPID)) {
inode->i_mode = mode;
- inode_fsuid_set(inode, mnt_userns);
+ inode_fsuid_set(inode, idmap);
inode->i_gid = dir->i_gid;
} else
- inode_init_owner(mnt_userns, inode, dir, mode);
+ inode_init_owner(idmap, inode, dir, mode);
if (ext4_has_feature_project(sb) &&
ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT))
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9d9f414f99fe..b936ee3af51e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1136,7 +1136,8 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
for (i = 0; i < nr_wait; i++) {
int err2;
- err2 = fscrypt_decrypt_pagecache_blocks(page, blocksize,
+ err2 = fscrypt_decrypt_pagecache_blocks(page_folio(page),
+ blocksize,
bh_offset(wait[i]));
if (err2) {
clear_buffer_uptodate(wait[i]);
@@ -3858,7 +3859,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
if (fscrypt_inode_uses_fs_layer_crypto(inode)) {
/* We expect the key to be set. */
BUG_ON(!fscrypt_has_encryption_key(inode));
- err = fscrypt_decrypt_pagecache_blocks(page, blocksize,
+ err = fscrypt_decrypt_pagecache_blocks(page_folio(page),
+ blocksize,
bh_offset(bh));
if (err) {
clear_buffer_uptodate(bh);
@@ -5434,7 +5436,7 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
*
* Called with inode->i_rwsem down.
*/
-int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
@@ -5454,7 +5456,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
ATTR_GID | ATTR_TIMES_SET))))
return -EPERM;
- error = setattr_prepare(mnt_userns, dentry, attr);
+ error = setattr_prepare(idmap, dentry, attr);
if (error)
return error;
@@ -5466,14 +5468,14 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if (error)
return error;
- if (is_quota_modification(mnt_userns, inode, attr)) {
+ if (is_quota_modification(idmap, inode, attr)) {
error = dquot_initialize(inode);
if (error)
return error;
}
- if (i_uid_needs_update(mnt_userns, attr, inode) ||
- i_gid_needs_update(mnt_userns, attr, inode)) {
+ if (i_uid_needs_update(idmap, attr, inode) ||
+ i_gid_needs_update(idmap, attr, inode)) {
handle_t *handle;
/* (user+group)*(old+new) structure, inode write (sb,
@@ -5490,7 +5492,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
* counts xattr inode references.
*/
down_read(&EXT4_I(inode)->xattr_sem);
- error = dquot_transfer(mnt_userns, inode, attr);
+ error = dquot_transfer(idmap, inode, attr);
up_read(&EXT4_I(inode)->xattr_sem);
if (error) {
@@ -5499,8 +5501,8 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
}
/* Update corresponding info in inode so that everything is in
* one transaction */
- i_uid_update(mnt_userns, attr, inode);
- i_gid_update(mnt_userns, attr, inode);
+ i_uid_update(idmap, attr, inode);
+ i_gid_update(idmap, attr, inode);
error = ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
if (unlikely(error)) {
@@ -5630,7 +5632,7 @@ out_mmap_sem:
if (!error) {
if (inc_ivers)
inode_inc_iversion(inode);
- setattr_copy(mnt_userns, inode, attr);
+ setattr_copy(idmap, inode, attr);
mark_inode_dirty(inode);
}
@@ -5642,7 +5644,7 @@ out_mmap_sem:
ext4_orphan_del(NULL, inode);
if (!error && (ia_valid & ATTR_MODE))
- rc = posix_acl_chmod(mnt_userns, dentry, inode->i_mode);
+ rc = posix_acl_chmod(idmap, dentry, inode->i_mode);
err_out:
if (error)
@@ -5668,7 +5670,7 @@ u32 ext4_dio_alignment(struct inode *inode)
return 1; /* use the iomap defaults */
}
-int ext4_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int ext4_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
@@ -5725,18 +5727,18 @@ int ext4_getattr(struct user_namespace *mnt_userns, const struct path *path,
STATX_ATTR_NODUMP |
STATX_ATTR_VERITY);
- generic_fillattr(mnt_userns, inode, stat);
+ generic_fillattr(idmap, inode, stat);
return 0;
}
-int ext4_file_getattr(struct user_namespace *mnt_userns,
+int ext4_file_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
u64 delalloc_blocks;
- ext4_getattr(mnt_userns, path, stat, request_mask, query_flags);
+ ext4_getattr(idmap, path, stat, request_mask, query_flags);
/*
* If there is inline data in the inode, the inode will normally not
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 8067ccda34e4..b0dc7212694e 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -358,12 +358,12 @@ void ext4_reset_inode_seed(struct inode *inode)
* important fields of the inodes.
*
* @sb: the super block of the filesystem
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @inode: the inode to swap with EXT4_BOOT_LOADER_INO
*
*/
static long swap_inode_boot_loader(struct super_block *sb,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct inode *inode)
{
handle_t *handle;
@@ -393,7 +393,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
}
if (IS_RDONLY(inode) || IS_APPEND(inode) || IS_IMMUTABLE(inode) ||
- !inode_owner_or_capable(mnt_userns, inode) ||
+ !inode_owner_or_capable(idmap, inode) ||
!capable(CAP_SYS_ADMIN)) {
err = -EPERM;
goto journal_err_out;
@@ -979,7 +979,7 @@ int ext4_fileattr_get(struct dentry *dentry, struct fileattr *fa)
return 0;
}
-int ext4_fileattr_set(struct user_namespace *mnt_userns,
+int ext4_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa)
{
struct inode *inode = d_inode(dentry);
@@ -1217,7 +1217,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct super_block *sb = inode->i_sb;
- struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
+ struct mnt_idmap *idmap = file_mnt_idmap(filp);
ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
@@ -1234,7 +1234,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
__u32 generation;
int err;
- if (!inode_owner_or_capable(mnt_userns, inode))
+ if (!inode_owner_or_capable(idmap, inode))
return -EPERM;
if (ext4_has_metadata_csum(inode->i_sb)) {
@@ -1376,7 +1376,7 @@ mext_out:
case EXT4_IOC_MIGRATE:
{
int err;
- if (!inode_owner_or_capable(mnt_userns, inode))
+ if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
err = mnt_want_write_file(filp);
@@ -1398,7 +1398,7 @@ mext_out:
case EXT4_IOC_ALLOC_DA_BLKS:
{
int err;
- if (!inode_owner_or_capable(mnt_userns, inode))
+ if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
err = mnt_want_write_file(filp);
@@ -1417,7 +1417,7 @@ mext_out:
err = mnt_want_write_file(filp);
if (err)
return err;
- err = swap_inode_boot_loader(sb, mnt_userns, inode);
+ err = swap_inode_boot_loader(sb, idmap, inode);
mnt_drop_write_file(filp);
return err;
}
@@ -1542,7 +1542,7 @@ resizefs_out:
case EXT4_IOC_CLEAR_ES_CACHE:
{
- if (!inode_owner_or_capable(mnt_userns, inode))
+ if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
ext4_clear_inode_es(inode);
return 0;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index dd28453d6ea3..d10a508d95cd 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2792,7 +2792,7 @@ static int ext4_add_nondir(handle_t *handle,
* If the create succeeds, we fill in the inode information
* with d_instantiate().
*/
-static int ext4_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int ext4_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
handle_t *handle;
@@ -2806,7 +2806,7 @@ static int ext4_create(struct user_namespace *mnt_userns, struct inode *dir,
credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
retry:
- inode = ext4_new_inode_start_handle(mnt_userns, dir, mode, &dentry->d_name,
+ inode = ext4_new_inode_start_handle(idmap, dir, mode, &dentry->d_name,
0, NULL, EXT4_HT_DIR, credits);
handle = ext4_journal_current_handle();
err = PTR_ERR(inode);
@@ -2827,7 +2827,7 @@ retry:
return err;
}
-static int ext4_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int ext4_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
handle_t *handle;
@@ -2841,7 +2841,7 @@ static int ext4_mknod(struct user_namespace *mnt_userns, struct inode *dir,
credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
retry:
- inode = ext4_new_inode_start_handle(mnt_userns, dir, mode, &dentry->d_name,
+ inode = ext4_new_inode_start_handle(idmap, dir, mode, &dentry->d_name,
0, NULL, EXT4_HT_DIR, credits);
handle = ext4_journal_current_handle();
err = PTR_ERR(inode);
@@ -2861,7 +2861,7 @@ retry:
return err;
}
-static int ext4_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+static int ext4_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
struct file *file, umode_t mode)
{
handle_t *handle;
@@ -2873,7 +2873,7 @@ static int ext4_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
return err;
retry:
- inode = ext4_new_inode_start_handle(mnt_userns, dir, mode,
+ inode = ext4_new_inode_start_handle(idmap, dir, mode,
NULL, 0, NULL,
EXT4_HT_DIR,
EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
@@ -2972,7 +2972,7 @@ out:
return err;
}
-static int ext4_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int ext4_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
handle_t *handle;
@@ -2989,7 +2989,7 @@ static int ext4_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
retry:
- inode = ext4_new_inode_start_handle(mnt_userns, dir, S_IFDIR | mode,
+ inode = ext4_new_inode_start_handle(idmap, dir, S_IFDIR | mode,
&dentry->d_name,
0, NULL, EXT4_HT_DIR, credits);
handle = ext4_journal_current_handle();
@@ -3339,7 +3339,7 @@ out:
return err;
}
-static int ext4_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int ext4_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
handle_t *handle;
@@ -3370,7 +3370,7 @@ static int ext4_symlink(struct user_namespace *mnt_userns, struct inode *dir,
credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3;
retry:
- inode = ext4_new_inode_start_handle(mnt_userns, dir, S_IFLNK|S_IRWXUGO,
+ inode = ext4_new_inode_start_handle(idmap, dir, S_IFLNK|S_IRWXUGO,
&dentry->d_name, 0, NULL,
EXT4_HT_DIR, credits);
handle = ext4_journal_current_handle();
@@ -3720,7 +3720,7 @@ static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent)
}
}
-static struct inode *ext4_whiteout_for_rename(struct user_namespace *mnt_userns,
+static struct inode *ext4_whiteout_for_rename(struct mnt_idmap *idmap,
struct ext4_renament *ent,
int credits, handle_t **h)
{
@@ -3735,7 +3735,7 @@ static struct inode *ext4_whiteout_for_rename(struct user_namespace *mnt_userns,
credits += (EXT4_MAXQUOTAS_TRANS_BLOCKS(ent->dir->i_sb) +
EXT4_XATTR_TRANS_BLOCKS + 4);
retry:
- wh = ext4_new_inode_start_handle(mnt_userns, ent->dir,
+ wh = ext4_new_inode_start_handle(idmap, ent->dir,
S_IFCHR | WHITEOUT_MODE,
&ent->dentry->d_name, 0, NULL,
EXT4_HT_DIR, credits);
@@ -3763,7 +3763,7 @@ retry:
* while new_{dentry,inode) refers to the destination dentry/inode
* This comes from rename(const char *oldpath, const char *newpath)
*/
-static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
@@ -3851,7 +3851,7 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
goto release_bh;
}
} else {
- whiteout = ext4_whiteout_for_rename(mnt_userns, &old, credits, &handle);
+ whiteout = ext4_whiteout_for_rename(idmap, &old, credits, &handle);
if (IS_ERR(whiteout)) {
retval = PTR_ERR(whiteout);
goto release_bh;
@@ -4158,7 +4158,7 @@ end_rename:
return retval;
}
-static int ext4_rename2(struct user_namespace *mnt_userns,
+static int ext4_rename2(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
@@ -4181,7 +4181,7 @@ static int ext4_rename2(struct user_namespace *mnt_userns,
new_dir, new_dentry);
}
- return ext4_rename(mnt_userns, old_dir, old_dentry, new_dir, new_dentry, flags);
+ return ext4_rename(idmap, old_dir, old_dentry, new_dir, new_dentry, flags);
}
/*
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index d5266932ce6c..c61dc8a7c014 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -211,8 +211,7 @@ static void ext4_set_bio_post_read_ctx(struct bio *bio,
static inline loff_t ext4_readpage_limit(struct inode *inode)
{
- if (IS_ENABLED(CONFIG_FS_VERITY) &&
- (IS_VERITY(inode) || ext4_verity_in_progress(inode)))
+ if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode))
return inode->i_sb->s_maxbytes;
return i_size_read(inode);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 260c1b3e3ef2..2ae46d11aa30 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2635,7 +2635,6 @@ static int ext4_check_test_dummy_encryption(const struct fs_context *fc,
{
const struct ext4_fs_context *ctx = fc->fs_private;
const struct ext4_sb_info *sbi = EXT4_SB(sb);
- int err;
if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy))
return 0;
@@ -2668,17 +2667,7 @@ static int ext4_check_test_dummy_encryption(const struct fs_context *fc,
"Conflicting test_dummy_encryption options");
return -EINVAL;
}
- /*
- * fscrypt_add_test_dummy_key() technically changes the super_block, so
- * technically it should be delayed until ext4_apply_options() like the
- * other changes. But since we never get here for remounts (see above),
- * and this is the last chance to report errors, we do it here.
- */
- err = fscrypt_add_test_dummy_key(sb, &ctx->dummy_enc_policy);
- if (err)
- ext4_msg(NULL, KERN_WARNING,
- "Error adding test dummy encryption key [%d]", err);
- return err;
+ return 0;
}
static void ext4_apply_test_dummy_encryption(struct ext4_fs_context *ctx,
@@ -5336,11 +5325,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
}
}
- if (ext4_has_feature_verity(sb) && sb->s_blocksize != PAGE_SIZE) {
- ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity");
- goto failed_mount_wq;
- }
-
/*
* Get the # of file system overhead blocks from the
* superblock if present.
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 3d3ed3c38f56..75bf1f88843c 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -55,12 +55,12 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry,
return paddr;
}
-static int ext4_encrypted_symlink_getattr(struct user_namespace *mnt_userns,
+static int ext4_encrypted_symlink_getattr(struct mnt_idmap *idmap,
const struct path *path,
struct kstat *stat, u32 request_mask,
unsigned int query_flags)
{
- ext4_getattr(mnt_userns, path, stat, request_mask, query_flags);
+ ext4_getattr(idmap, path, stat, request_mask, query_flags);
return fscrypt_symlink_getattr(path, stat);
}
diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
index 30e3b65798b5..e4da1704438e 100644
--- a/fs/ext4/verity.c
+++ b/fs/ext4/verity.c
@@ -381,11 +381,11 @@ static struct page *ext4_read_merkle_tree_page(struct inode *inode,
}
static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf,
- u64 index, int log_blocksize)
+ u64 pos, unsigned int size)
{
- loff_t pos = ext4_verity_metadata_pos(inode) + (index << log_blocksize);
+ pos += ext4_verity_metadata_pos(inode);
- return pagecache_write(inode, buf, 1 << log_blocksize, pos);
+ return pagecache_write(inode, buf, size, pos);
}
const struct fsverity_operations ext4_verityops = {
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 7decaaf27e82..a2f04a3808db 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -81,6 +81,8 @@ ext4_xattr_block_cache_find(struct inode *, struct ext4_xattr_header *,
struct mb_cache_entry **);
static __le32 ext4_xattr_hash_entry(char *name, size_t name_len, __le32 *value,
size_t value_count);
+static __le32 ext4_xattr_hash_entry_signed(char *name, size_t name_len, __le32 *value,
+ size_t value_count);
static void ext4_xattr_rehash(struct ext4_xattr_header *);
static const struct xattr_handler * const ext4_xattr_handler_map[] = {
@@ -470,8 +472,22 @@ ext4_xattr_inode_verify_hashes(struct inode *ea_inode,
tmp_data = cpu_to_le32(hash);
e_hash = ext4_xattr_hash_entry(entry->e_name, entry->e_name_len,
&tmp_data, 1);
+ /* All good? */
+ if (e_hash == entry->e_hash)
+ return 0;
+
+ /*
+ * Not good. Maybe the entry hash was calculated
+ * using the buggy signed char version?
+ */
+ e_hash = ext4_xattr_hash_entry_signed(entry->e_name, entry->e_name_len,
+ &tmp_data, 1);
+ /* Still no match - bad */
if (e_hash != entry->e_hash)
return -EFSCORRUPTED;
+
+ /* Let people know about old hash */
+ pr_warn_once("ext4: filesystem with signed xattr name hash");
}
return 0;
}
@@ -3081,7 +3097,29 @@ static __le32 ext4_xattr_hash_entry(char *name, size_t name_len, __le32 *value,
while (name_len--) {
hash = (hash << NAME_HASH_SHIFT) ^
(hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
- *name++;
+ (unsigned char)*name++;
+ }
+ while (value_count--) {
+ hash = (hash << VALUE_HASH_SHIFT) ^
+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
+ le32_to_cpu(*value++);
+ }
+ return cpu_to_le32(hash);
+}
+
+/*
+ * ext4_xattr_hash_entry_signed()
+ *
+ * Compute the hash of an extended attribute incorrectly.
+ */
+static __le32 ext4_xattr_hash_entry_signed(char *name, size_t name_len, __le32 *value, size_t value_count)
+{
+ __u32 hash = 0;
+
+ while (name_len--) {
+ hash = (hash << NAME_HASH_SHIFT) ^
+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
+ (signed char)*name++;
}
while (value_count--) {
hash = (hash << VALUE_HASH_SHIFT) ^
diff --git a/fs/ext4/xattr_hurd.c b/fs/ext4/xattr_hurd.c
index c78df5790377..8a5842e4cd95 100644
--- a/fs/ext4/xattr_hurd.c
+++ b/fs/ext4/xattr_hurd.c
@@ -32,7 +32,7 @@ ext4_xattr_hurd_get(const struct xattr_handler *handler,
static int
ext4_xattr_hurd_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index 8213f66f7b2d..776cf11d24ca 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -23,7 +23,7 @@ ext4_xattr_security_get(const struct xattr_handler *handler,
static int
ext4_xattr_security_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index 7c21ffb26d25..9811eb0ab276 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -30,7 +30,7 @@ ext4_xattr_trusted_get(const struct xattr_handler *handler,
static int
ext4_xattr_trusted_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index 2fe7ff0a479c..4b70bf4e7626 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -31,7 +31,7 @@ ext4_xattr_user_get(const struct xattr_handler *handler,
static int
ext4_xattr_user_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index c1c74aa658ae..ec2aeccb69a3 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -204,7 +204,7 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type, bool rcu)
return __f2fs_get_acl(inode, type, NULL);
}
-static int f2fs_acl_update_mode(struct user_namespace *mnt_userns,
+static int f2fs_acl_update_mode(struct mnt_idmap *idmap,
struct inode *inode, umode_t *mode_p,
struct posix_acl **acl)
{
@@ -219,14 +219,14 @@ static int f2fs_acl_update_mode(struct user_namespace *mnt_userns,
return error;
if (error == 0)
*acl = NULL;
- if (!vfsgid_in_group_p(i_gid_into_vfsgid(mnt_userns, inode)) &&
- !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
+ if (!vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode)) &&
+ !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
mode &= ~S_ISGID;
*mode_p = mode;
return 0;
}
-static int __f2fs_set_acl(struct user_namespace *mnt_userns,
+static int __f2fs_set_acl(struct mnt_idmap *idmap,
struct inode *inode, int type,
struct posix_acl *acl, struct page *ipage)
{
@@ -240,7 +240,7 @@ static int __f2fs_set_acl(struct user_namespace *mnt_userns,
case ACL_TYPE_ACCESS:
name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
if (acl && !ipage) {
- error = f2fs_acl_update_mode(mnt_userns, inode,
+ error = f2fs_acl_update_mode(idmap, inode,
&mode, &acl);
if (error)
return error;
@@ -276,7 +276,7 @@ static int __f2fs_set_acl(struct user_namespace *mnt_userns,
return error;
}
-int f2fs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int f2fs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
struct inode *inode = d_inode(dentry);
@@ -284,7 +284,7 @@ int f2fs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
return -EIO;
- return __f2fs_set_acl(mnt_userns, inode, type, acl, NULL);
+ return __f2fs_set_acl(idmap, inode, type, acl, NULL);
}
/*
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
index ea2bbb3f264b..94ebfbfbdc6f 100644
--- a/fs/f2fs/acl.h
+++ b/fs/f2fs/acl.h
@@ -34,7 +34,7 @@ struct f2fs_acl_header {
#ifdef CONFIG_F2FS_FS_POSIX_ACL
extern struct posix_acl *f2fs_get_acl(struct inode *, int, bool);
-extern int f2fs_set_acl(struct user_namespace *, struct dentry *,
+extern int f2fs_set_acl(struct mnt_idmap *, struct dentry *,
struct posix_acl *, int);
extern int f2fs_init_acl(struct inode *, struct inode *, struct page *,
struct page *);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 6e43e19c7d1c..8630df80fedb 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2053,8 +2053,7 @@ out:
static inline loff_t f2fs_readpage_limit(struct inode *inode)
{
- if (IS_ENABLED(CONFIG_FS_VERITY) &&
- (IS_VERITY(inode) || f2fs_verity_in_progress(inode)))
+ if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode))
return inode->i_sb->s_maxbytes;
return i_size_read(inode);
@@ -2183,7 +2182,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
sector_t last_block_in_file;
const unsigned blocksize = blks_to_bytes(inode, 1);
struct decompress_io_ctx *dic = NULL;
- struct extent_info ei = {0, };
+ struct extent_info ei = {};
bool from_dnode = true;
int i;
int ret = 0;
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 1bd38a78ebba..342af24b2f8c 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -546,7 +546,8 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
struct extent_node *en;
bool ret = false;
- f2fs_bug_on(sbi, !et);
+ if (!et)
+ return false;
trace_f2fs_lookup_extent_tree_start(inode, pgofs, type);
@@ -881,12 +882,14 @@ static unsigned long long __calculate_block_age(unsigned long long new,
}
/* This returns a new age and allocated blocks in ei */
-static int __get_new_block_age(struct inode *inode, struct extent_info *ei)
+static int __get_new_block_age(struct inode *inode, struct extent_info *ei,
+ block_t blkaddr)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
loff_t f_size = i_size_read(inode);
unsigned long long cur_blocks =
atomic64_read(&sbi->allocated_data_blocks);
+ struct extent_info tei = *ei; /* only fofs and len are valid */
/*
* When I/O is not aligned to a PAGE_SIZE, update will happen to the last
@@ -894,20 +897,20 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei)
* block here.
*/
if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) &&
- ei->blk == NEW_ADDR)
+ blkaddr == NEW_ADDR)
return -EINVAL;
- if (__lookup_extent_tree(inode, ei->fofs, ei, EX_BLOCK_AGE)) {
+ if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) {
unsigned long long cur_age;
- if (cur_blocks >= ei->last_blocks)
- cur_age = cur_blocks - ei->last_blocks;
+ if (cur_blocks >= tei.last_blocks)
+ cur_age = cur_blocks - tei.last_blocks;
else
/* allocated_data_blocks overflow */
- cur_age = ULLONG_MAX - ei->last_blocks + cur_blocks;
+ cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks;
- if (ei->age)
- ei->age = __calculate_block_age(cur_age, ei->age);
+ if (tei.age)
+ ei->age = __calculate_block_age(cur_age, tei.age);
else
ei->age = cur_age;
ei->last_blocks = cur_blocks;
@@ -915,14 +918,14 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei)
return 0;
}
- f2fs_bug_on(sbi, ei->blk == NULL_ADDR);
+ f2fs_bug_on(sbi, blkaddr == NULL_ADDR);
/* the data block was allocated for the first time */
- if (ei->blk == NEW_ADDR)
+ if (blkaddr == NEW_ADDR)
goto out;
- if (__is_valid_data_blkaddr(ei->blk) &&
- !f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE)) {
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
f2fs_bug_on(sbi, 1);
return -EINVAL;
}
@@ -938,7 +941,7 @@ out:
static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type)
{
- struct extent_info ei;
+ struct extent_info ei = {};
if (!__may_extent_tree(dn->inode, type))
return;
@@ -953,8 +956,7 @@ static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type typ
else
ei.blk = dn->data_blkaddr;
} else if (type == EX_BLOCK_AGE) {
- ei.blk = dn->data_blkaddr;
- if (__get_new_block_age(dn->inode, &ei))
+ if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr))
return;
}
__update_extent_tree_range(dn->inode, &ei, type);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e8953c3dc81a..9a3ffa39ad30 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3469,15 +3469,15 @@ void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock);
int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
int f2fs_truncate(struct inode *inode);
-int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int f2fs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags);
-int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr);
int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end);
void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count);
int f2fs_precache_extents(struct inode *inode);
int f2fs_fileattr_get(struct dentry *dentry, struct fileattr *fa);
-int f2fs_fileattr_set(struct user_namespace *mnt_userns,
+int f2fs_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
@@ -3505,7 +3505,7 @@ void f2fs_handle_failed_inode(struct inode *inode);
int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
bool hot, bool set);
struct dentry *f2fs_get_parent(struct dentry *child);
-int f2fs_get_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+int f2fs_get_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
struct inode **new_inode);
/*
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index a6c401279886..b90617639743 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -837,7 +837,7 @@ static bool f2fs_force_buffered_io(struct inode *inode, int rw)
return false;
}
-int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int f2fs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
@@ -892,7 +892,7 @@ int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path,
STATX_ATTR_NODUMP |
STATX_ATTR_VERITY);
- generic_fillattr(mnt_userns, inode, stat);
+ generic_fillattr(idmap, inode, stat);
/* we need to show initial sectors used for inline_data/dentries */
if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) ||
@@ -903,13 +903,13 @@ int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path,
}
#ifdef CONFIG_F2FS_FS_POSIX_ACL
-static void __setattr_copy(struct user_namespace *mnt_userns,
+static void __setattr_copy(struct mnt_idmap *idmap,
struct inode *inode, const struct iattr *attr)
{
unsigned int ia_valid = attr->ia_valid;
- i_uid_update(mnt_userns, attr, inode);
- i_gid_update(mnt_userns, attr, inode);
+ i_uid_update(idmap, attr, inode);
+ i_gid_update(idmap, attr, inode);
if (ia_valid & ATTR_ATIME)
inode->i_atime = attr->ia_atime;
if (ia_valid & ATTR_MTIME)
@@ -918,10 +918,10 @@ static void __setattr_copy(struct user_namespace *mnt_userns,
inode->i_ctime = attr->ia_ctime;
if (ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;
- vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
+ vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
if (!vfsgid_in_group_p(vfsgid) &&
- !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
+ !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
mode &= ~S_ISGID;
set_acl_inode(inode, mode);
}
@@ -930,7 +930,7 @@ static void __setattr_copy(struct user_namespace *mnt_userns,
#define __setattr_copy setattr_copy
#endif
-int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
@@ -951,7 +951,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
!f2fs_is_compress_backend_ready(inode))
return -EOPNOTSUPP;
- err = setattr_prepare(mnt_userns, dentry, attr);
+ err = setattr_prepare(idmap, dentry, attr);
if (err)
return err;
@@ -963,15 +963,15 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if (err)
return err;
- if (is_quota_modification(mnt_userns, inode, attr)) {
+ if (is_quota_modification(idmap, inode, attr)) {
err = f2fs_dquot_initialize(inode);
if (err)
return err;
}
- if (i_uid_needs_update(mnt_userns, attr, inode) ||
- i_gid_needs_update(mnt_userns, attr, inode)) {
+ if (i_uid_needs_update(idmap, attr, inode) ||
+ i_gid_needs_update(idmap, attr, inode)) {
f2fs_lock_op(F2FS_I_SB(inode));
- err = dquot_transfer(mnt_userns, inode, attr);
+ err = dquot_transfer(idmap, inode, attr);
if (err) {
set_sbi_flag(F2FS_I_SB(inode),
SBI_QUOTA_NEED_REPAIR);
@@ -982,8 +982,8 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
* update uid/gid under lock_op(), so that dquot and inode can
* be updated atomically.
*/
- i_uid_update(mnt_userns, attr, inode);
- i_gid_update(mnt_userns, attr, inode);
+ i_uid_update(idmap, attr, inode);
+ i_gid_update(idmap, attr, inode);
f2fs_mark_inode_dirty_sync(inode, true);
f2fs_unlock_op(F2FS_I_SB(inode));
}
@@ -1023,10 +1023,10 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
spin_unlock(&F2FS_I(inode)->i_size_lock);
}
- __setattr_copy(mnt_userns, inode, attr);
+ __setattr_copy(idmap, inode, attr);
if (attr->ia_valid & ATTR_MODE) {
- err = posix_acl_chmod(mnt_userns, dentry, f2fs_get_inode_mode(inode));
+ err = posix_acl_chmod(idmap, dentry, f2fs_get_inode_mode(inode));
if (is_inode_flag_set(inode, FI_ACL_MODE)) {
if (!err)
@@ -2038,14 +2038,14 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
{
struct inode *inode = file_inode(filp);
- struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
+ struct mnt_idmap *idmap = file_mnt_idmap(filp);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct inode *pinode;
loff_t isize;
int ret;
- if (!inode_owner_or_capable(mnt_userns, inode))
+ if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
if (!S_ISREG(inode->i_mode))
@@ -2095,7 +2095,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
goto out;
}
- ret = f2fs_get_tmpfile(mnt_userns, pinode, &fi->cow_inode);
+ ret = f2fs_get_tmpfile(idmap, pinode, &fi->cow_inode);
iput(pinode);
if (ret) {
f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
@@ -2135,10 +2135,10 @@ out:
static int f2fs_ioc_commit_atomic_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
- struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
+ struct mnt_idmap *idmap = file_mnt_idmap(filp);
int ret;
- if (!inode_owner_or_capable(mnt_userns, inode))
+ if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
ret = mnt_want_write_file(filp);
@@ -2167,10 +2167,10 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
static int f2fs_ioc_abort_atomic_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
- struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
+ struct mnt_idmap *idmap = file_mnt_idmap(filp);
int ret;
- if (!inode_owner_or_capable(mnt_userns, inode))
+ if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
ret = mnt_want_write_file(filp);
@@ -2559,7 +2559,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
struct f2fs_map_blocks map = { .m_next_extent = NULL,
.m_seg_type = NO_CHECK_TYPE,
.m_may_create = false };
- struct extent_info ei = {0, };
+ struct extent_info ei = {};
pgoff_t pg_start, pg_end, next_pgofs;
unsigned int blk_per_seg = sbi->blocks_per_seg;
unsigned int total = 0, sec_num;
@@ -3090,7 +3090,7 @@ int f2fs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
return 0;
}
-int f2fs_fileattr_set(struct user_namespace *mnt_userns,
+int f2fs_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa)
{
struct inode *inode = d_inode(dentry);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 6032589099ce..d8e01bbbf27f 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -202,7 +202,7 @@ static void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode,
file_set_hot(inode);
}
-static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns,
+static struct inode *f2fs_new_inode(struct mnt_idmap *idmap,
struct inode *dir, umode_t mode,
const char *name)
{
@@ -225,7 +225,7 @@ static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns,
nid_free = true;
- inode_init_owner(mnt_userns, inode, dir, mode);
+ inode_init_owner(idmap, inode, dir, mode);
inode->i_ino = ino;
inode->i_blocks = 0;
@@ -246,7 +246,7 @@ static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns,
(F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL))
F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid;
else
- F2FS_I(inode)->i_projid = make_kprojid(mnt_userns,
+ F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns,
F2FS_DEF_PROJID);
err = fscrypt_prepare_new_inode(dir, inode, &encrypt);
@@ -333,7 +333,7 @@ fail_drop:
return ERR_PTR(err);
}
-static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int f2fs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
@@ -350,7 +350,7 @@ static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir,
if (err)
return err;
- inode = f2fs_new_inode(mnt_userns, dir, mode, dentry->d_name.name);
+ inode = f2fs_new_inode(idmap, dir, mode, dentry->d_name.name);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -659,7 +659,7 @@ static const char *f2fs_get_link(struct dentry *dentry,
return link;
}
-static int f2fs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int f2fs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
@@ -682,7 +682,7 @@ static int f2fs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
if (err)
return err;
- inode = f2fs_new_inode(mnt_userns, dir, S_IFLNK | S_IRWXUGO, NULL);
+ inode = f2fs_new_inode(idmap, dir, S_IFLNK | S_IRWXUGO, NULL);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -739,7 +739,7 @@ out_free_encrypted_link:
return err;
}
-static int f2fs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int f2fs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
@@ -753,7 +753,7 @@ static int f2fs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
if (err)
return err;
- inode = f2fs_new_inode(mnt_userns, dir, S_IFDIR | mode, NULL);
+ inode = f2fs_new_inode(idmap, dir, S_IFDIR | mode, NULL);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -794,7 +794,7 @@ static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
return -ENOTEMPTY;
}
-static int f2fs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int f2fs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
@@ -810,7 +810,7 @@ static int f2fs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
if (err)
return err;
- inode = f2fs_new_inode(mnt_userns, dir, mode, NULL);
+ inode = f2fs_new_inode(idmap, dir, mode, NULL);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -837,7 +837,7 @@ out:
return err;
}
-static int __f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+static int __f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
struct file *file, umode_t mode, bool is_whiteout,
struct inode **new_inode)
{
@@ -849,7 +849,7 @@ static int __f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
if (err)
return err;
- inode = f2fs_new_inode(mnt_userns, dir, mode, NULL);
+ inode = f2fs_new_inode(idmap, dir, mode, NULL);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -907,7 +907,7 @@ out:
return err;
}
-static int f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+static int f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
struct file *file, umode_t mode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
@@ -918,28 +918,28 @@ static int f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
- err = __f2fs_tmpfile(mnt_userns, dir, file, mode, false, NULL);
+ err = __f2fs_tmpfile(idmap, dir, file, mode, false, NULL);
return finish_open_simple(file, err);
}
-static int f2fs_create_whiteout(struct user_namespace *mnt_userns,
+static int f2fs_create_whiteout(struct mnt_idmap *idmap,
struct inode *dir, struct inode **whiteout)
{
if (unlikely(f2fs_cp_error(F2FS_I_SB(dir))))
return -EIO;
- return __f2fs_tmpfile(mnt_userns, dir, NULL,
+ return __f2fs_tmpfile(idmap, dir, NULL,
S_IFCHR | WHITEOUT_MODE, true, whiteout);
}
-int f2fs_get_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+int f2fs_get_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
struct inode **new_inode)
{
- return __f2fs_tmpfile(mnt_userns, dir, NULL, S_IFREG, false, new_inode);
+ return __f2fs_tmpfile(idmap, dir, NULL, S_IFREG, false, new_inode);
}
-static int f2fs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
@@ -979,7 +979,7 @@ static int f2fs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
}
if (flags & RENAME_WHITEOUT) {
- err = f2fs_create_whiteout(mnt_userns, old_dir, &whiteout);
+ err = f2fs_create_whiteout(idmap, old_dir, &whiteout);
if (err)
return err;
}
@@ -1295,7 +1295,7 @@ out:
return err;
}
-static int f2fs_rename2(struct user_namespace *mnt_userns,
+static int f2fs_rename2(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
@@ -1318,7 +1318,7 @@ static int f2fs_rename2(struct user_namespace *mnt_userns,
* VFS has already handled the new dentry existence case,
* here, we just deal with "RENAME_NOREPLACE" as regular rename.
*/
- return f2fs_rename(mnt_userns, old_dir, old_dentry,
+ return f2fs_rename(idmap, old_dir, old_dentry,
new_dir, new_dentry, flags);
}
@@ -1342,12 +1342,12 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
return target;
}
-static int f2fs_encrypted_symlink_getattr(struct user_namespace *mnt_userns,
+static int f2fs_encrypted_symlink_getattr(struct mnt_idmap *idmap,
const struct path *path,
struct kstat *stat, u32 request_mask,
unsigned int query_flags)
{
- f2fs_getattr(mnt_userns, path, stat, request_mask, query_flags);
+ f2fs_getattr(idmap, path, stat, request_mask, query_flags);
return fscrypt_symlink_getattr(path, stat);
}
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 77fd453949b1..dfd41908b12d 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -258,15 +258,15 @@ static int recover_quota_data(struct inode *inode, struct page *page)
attr.ia_vfsuid = VFSUIDT_INIT(make_kuid(inode->i_sb->s_user_ns, i_uid));
attr.ia_vfsgid = VFSGIDT_INIT(make_kgid(inode->i_sb->s_user_ns, i_gid));
- if (!vfsuid_eq(attr.ia_vfsuid, i_uid_into_vfsuid(&init_user_ns, inode)))
+ if (!vfsuid_eq(attr.ia_vfsuid, i_uid_into_vfsuid(&nop_mnt_idmap, inode)))
attr.ia_valid |= ATTR_UID;
- if (!vfsgid_eq(attr.ia_vfsgid, i_gid_into_vfsgid(&init_user_ns, inode)))
+ if (!vfsgid_eq(attr.ia_vfsgid, i_gid_into_vfsgid(&nop_mnt_idmap, inode)))
attr.ia_valid |= ATTR_GID;
if (!attr.ia_valid)
return 0;
- err = dquot_transfer(&init_user_ns, inode, &attr);
+ err = dquot_transfer(&nop_mnt_idmap, inode, &attr);
if (err)
set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR);
return err;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 25ddea478fc1..ae3c4e5474ef 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -663,8 +663,7 @@ init_thread:
if (IS_ERR(fcc->f2fs_issue_flush)) {
int err = PTR_ERR(fcc->f2fs_issue_flush);
- kfree(fcc);
- SM_I(sbi)->fcc_info = NULL;
+ fcc->f2fs_issue_flush = NULL;
return err;
}
@@ -3161,7 +3160,7 @@ static int __get_segment_type_4(struct f2fs_io_info *fio)
static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct extent_info ei;
+ struct extent_info ei = {};
if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) {
if (!ei.age)
@@ -5138,11 +5137,9 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
init_f2fs_rwsem(&sm_info->curseg_lock);
- if (!f2fs_readonly(sbi->sb)) {
- err = f2fs_create_flush_cmd_control(sbi);
- if (err)
- return err;
- }
+ err = f2fs_create_flush_cmd_control(sbi);
+ if (err)
+ return err;
err = create_discard_cmd_control(sbi);
if (err)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 1f812b9ce985..64d3556d61a5 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -540,12 +540,6 @@ static int f2fs_set_test_dummy_encryption(struct super_block *sb,
opt, err);
return -EINVAL;
}
- err = fscrypt_add_test_dummy_key(sb, policy);
- if (err) {
- f2fs_warn(sbi, "Error adding test dummy encryption key [%d]",
- err);
- return err;
- }
f2fs_warn(sbi, "Test dummy encryption mode enabled");
return 0;
}
diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c
index c352fff88a5e..f320ed8172ec 100644
--- a/fs/f2fs/verity.c
+++ b/fs/f2fs/verity.c
@@ -276,11 +276,11 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
}
static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf,
- u64 index, int log_blocksize)
+ u64 pos, unsigned int size)
{
- loff_t pos = f2fs_verity_metadata_pos(inode) + (index << log_blocksize);
+ pos += f2fs_verity_metadata_pos(inode);
- return pagecache_write(inode, buf, 1 << log_blocksize, pos);
+ return pagecache_write(inode, buf, size, pos);
}
const struct fsverity_operations f2fs_verityops = {
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index dc2e8637189e..d92edbbdc30e 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -65,7 +65,7 @@ static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
}
static int f2fs_xattr_generic_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
@@ -109,7 +109,7 @@ static int f2fs_xattr_advise_get(const struct xattr_handler *handler,
}
static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
@@ -117,7 +117,7 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
unsigned char old_advise = F2FS_I(inode)->i_advise;
unsigned char new_advise;
- if (!inode_owner_or_capable(&init_user_ns, inode))
+ if (!inode_owner_or_capable(&nop_mnt_idmap, inode))
return -EPERM;
if (value == NULL)
return -EINVAL;
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index a415c02ede39..e3b690b48e3e 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -398,10 +398,10 @@ extern long fat_generic_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg);
extern const struct file_operations fat_file_operations;
extern const struct inode_operations fat_file_inode_operations;
-extern int fat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+extern int fat_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr);
extern void fat_truncate_blocks(struct inode *inode, loff_t offset);
-extern int fat_getattr(struct user_namespace *mnt_userns,
+extern int fat_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags);
extern int fat_file_fsync(struct file *file, loff_t start, loff_t end,
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 8a6b493b5b5f..795a4fad5c40 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -90,13 +90,13 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
* out the RO attribute for checking by the security
* module, just because it maps to a file mode.
*/
- err = security_inode_setattr(file_mnt_user_ns(file),
+ err = security_inode_setattr(file_mnt_idmap(file),
file->f_path.dentry, &ia);
if (err)
goto out_unlock_inode;
/* This MUST be done before doing anything irreversible... */
- err = fat_setattr(file_mnt_user_ns(file), file->f_path.dentry, &ia);
+ err = fat_setattr(file_mnt_idmap(file), file->f_path.dentry, &ia);
if (err)
goto out_unlock_inode;
@@ -395,13 +395,13 @@ void fat_truncate_blocks(struct inode *inode, loff_t offset)
fat_flush_inodes(inode->i_sb, inode, NULL);
}
-int fat_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int fat_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct inode *inode = d_inode(path->dentry);
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
- generic_fillattr(mnt_userns, inode, stat);
+ generic_fillattr(idmap, inode, stat);
stat->blksize = sbi->cluster_size;
if (sbi->options.nfs == FAT_NFS_NOSTALE_RO) {
@@ -456,14 +456,14 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi,
return 0;
}
-static int fat_allow_set_time(struct user_namespace *mnt_userns,
+static int fat_allow_set_time(struct mnt_idmap *idmap,
struct msdos_sb_info *sbi, struct inode *inode)
{
umode_t allow_utime = sbi->options.allow_utime;
- if (!vfsuid_eq_kuid(i_uid_into_vfsuid(mnt_userns, inode),
+ if (!vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode),
current_fsuid())) {
- if (vfsgid_in_group_p(i_gid_into_vfsgid(mnt_userns, inode)))
+ if (vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode)))
allow_utime >>= 3;
if (allow_utime & MAY_WRITE)
return 1;
@@ -477,7 +477,7 @@ static int fat_allow_set_time(struct user_namespace *mnt_userns,
/* valid file mode bits */
#define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO)
-int fat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int fat_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
@@ -488,11 +488,11 @@ int fat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
/* Check for setting the inode time. */
ia_valid = attr->ia_valid;
if (ia_valid & TIMES_SET_FLAGS) {
- if (fat_allow_set_time(mnt_userns, sbi, inode))
+ if (fat_allow_set_time(idmap, sbi, inode))
attr->ia_valid &= ~TIMES_SET_FLAGS;
}
- error = setattr_prepare(mnt_userns, dentry, attr);
+ error = setattr_prepare(idmap, dentry, attr);
attr->ia_valid = ia_valid;
if (error) {
if (sbi->options.quiet)
@@ -518,10 +518,10 @@ int fat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
}
if (((attr->ia_valid & ATTR_UID) &&
- (!uid_eq(from_vfsuid(mnt_userns, i_user_ns(inode), attr->ia_vfsuid),
+ (!uid_eq(from_vfsuid(idmap, i_user_ns(inode), attr->ia_vfsuid),
sbi->options.fs_uid))) ||
((attr->ia_valid & ATTR_GID) &&
- (!gid_eq(from_vfsgid(mnt_userns, i_user_ns(inode), attr->ia_vfsgid),
+ (!gid_eq(from_vfsgid(idmap, i_user_ns(inode), attr->ia_vfsgid),
sbi->options.fs_gid))) ||
((attr->ia_valid & ATTR_MODE) &&
(attr->ia_mode & ~FAT_VALID_MODE)))
@@ -564,7 +564,7 @@ int fat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
fat_truncate_time(inode, &attr->ia_mtime, S_MTIME);
attr->ia_valid &= ~(ATTR_ATIME|ATTR_CTIME|ATTR_MTIME);
- setattr_copy(mnt_userns, inode, attr);
+ setattr_copy(idmap, inode, attr);
mark_inode_dirty(inode);
out:
return error;
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index efba301d68ae..2116c486843b 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -261,7 +261,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
}
/***** Create a file */
-static int msdos_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int msdos_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
struct super_block *sb = dir->i_sb;
@@ -339,7 +339,7 @@ out:
}
/***** Make a directory */
-static int msdos_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int msdos_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct super_block *sb = dir->i_sb;
@@ -594,7 +594,7 @@ error_inode:
}
/***** Rename, a wrapper for rename_same_dir & rename_diff_dir */
-static int msdos_rename(struct user_namespace *mnt_userns,
+static int msdos_rename(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 21620054e1c4..fceda1de4805 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -756,7 +756,7 @@ error:
return ERR_PTR(err);
}
-static int vfat_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int vfat_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
struct super_block *sb = dir->i_sb;
@@ -844,7 +844,7 @@ out:
return err;
}
-static int vfat_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int vfat_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct super_block *sb = dir->i_sb;
@@ -1158,7 +1158,7 @@ error_exchange:
goto out;
}
-static int vfat_rename2(struct user_namespace *mnt_userns, struct inode *old_dir,
+static int vfat_rename2(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 146c9ab0cd4b..b622be119706 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -10,6 +10,7 @@
#include <linux/mm.h>
#include <linux/sched/task.h>
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/capability.h>
@@ -47,7 +48,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
/* O_NOATIME can only be set by the owner or superuser */
if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
- if (!inode_owner_or_capable(file_mnt_user_ns(filp), inode))
+ if (!inode_owner_or_capable(file_mnt_idmap(filp), inode))
return -EPERM;
/* required for strict SunOS emulation */
diff --git a/fs/file_table.c b/fs/file_table.c
index dd88701e54a9..372653b92617 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -13,6 +13,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/security.h>
#include <linux/cred.h>
#include <linux/eventpoll.h>
diff --git a/fs/freevxfs/Kconfig b/fs/freevxfs/Kconfig
index c05c71d57291..0e2fc08f7de4 100644
--- a/fs/freevxfs/Kconfig
+++ b/fs/freevxfs/Kconfig
@@ -8,7 +8,7 @@ config VXFS_FS
of SCO UnixWare (and possibly others) and optionally available
for Sunsoft Solaris, HP-UX and many other operating systems. However
these particular OS implementations of vxfs may differ in on-disk
- data endianess and/or superblock offset. The vxfs module has been
+ data endianness and/or superblock offset. The vxfs module has been
tested with SCO UnixWare and HP-UX B.10.20 (pa-risc 1.1 arch.)
Currently only readonly access is supported and VxFX versions
2, 3 and 4. Tests were performed with HP-UX VxFS version 3.
diff --git a/fs/fscache/volume.c b/fs/fscache/volume.c
index ab8ceddf9efa..cdf991bdd9de 100644
--- a/fs/fscache/volume.c
+++ b/fs/fscache/volume.c
@@ -141,13 +141,14 @@ static bool fscache_is_acquire_pending(struct fscache_volume *volume)
static void fscache_wait_on_volume_collision(struct fscache_volume *candidate,
unsigned int collidee_debug_id)
{
- wait_var_event_timeout(&candidate->flags,
- !fscache_is_acquire_pending(candidate), 20 * HZ);
+ wait_on_bit_timeout(&candidate->flags, FSCACHE_VOLUME_ACQUIRE_PENDING,
+ TASK_UNINTERRUPTIBLE, 20 * HZ);
if (fscache_is_acquire_pending(candidate)) {
pr_notice("Potential volume collision new=%08x old=%08x",
candidate->debug_id, collidee_debug_id);
fscache_stat(&fscache_n_volumes_collision);
- wait_var_event(&candidate->flags, !fscache_is_acquire_pending(candidate));
+ wait_on_bit(&candidate->flags, FSCACHE_VOLUME_ACQUIRE_PENDING,
+ TASK_UNINTERRUPTIBLE);
}
}
@@ -279,8 +280,7 @@ static void fscache_create_volume_work(struct work_struct *work)
fscache_end_cache_access(volume->cache,
fscache_access_acquire_volume_end);
- clear_bit_unlock(FSCACHE_VOLUME_CREATING, &volume->flags);
- wake_up_bit(&volume->flags, FSCACHE_VOLUME_CREATING);
+ clear_and_wake_up_bit(FSCACHE_VOLUME_CREATING, &volume->flags);
fscache_put_volume(volume, fscache_volume_put_create_work);
}
@@ -347,8 +347,8 @@ static void fscache_wake_pending_volume(struct fscache_volume *volume,
hlist_bl_for_each_entry(cursor, p, h, hash_link) {
if (fscache_volume_same(cursor, volume)) {
fscache_see_volume(cursor, fscache_volume_see_hash_wake);
- clear_bit(FSCACHE_VOLUME_ACQUIRE_PENDING, &cursor->flags);
- wake_up_bit(&cursor->flags, FSCACHE_VOLUME_ACQUIRE_PENDING);
+ clear_and_wake_up_bit(FSCACHE_VOLUME_ACQUIRE_PENDING,
+ &cursor->flags);
return;
}
}
diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c
index a4850aee2639..3d192b80a561 100644
--- a/fs/fuse/acl.c
+++ b/fs/fuse/acl.c
@@ -11,9 +11,10 @@
#include <linux/posix_acl.h>
#include <linux/posix_acl_xattr.h>
-struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu)
+static struct posix_acl *__fuse_get_acl(struct fuse_conn *fc,
+ struct mnt_idmap *idmap,
+ struct inode *inode, int type, bool rcu)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
int size;
const char *name;
void *value = NULL;
@@ -25,7 +26,7 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu)
if (fuse_is_bad(inode))
return ERR_PTR(-EIO);
- if (!fc->posix_acl || fc->no_getxattr)
+ if (fc->no_getxattr)
return NULL;
if (type == ACL_TYPE_ACCESS)
@@ -53,7 +54,47 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu)
return acl;
}
-int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+static inline bool fuse_no_acl(const struct fuse_conn *fc,
+ const struct inode *inode)
+{
+ /*
+ * Refuse interacting with POSIX ACLs for daemons that
+ * don't support FUSE_POSIX_ACL and are not mounted on
+ * the host to retain backwards compatibility.
+ */
+ return !fc->posix_acl && (i_user_ns(inode) != &init_user_ns);
+}
+
+struct posix_acl *fuse_get_acl(struct mnt_idmap *idmap,
+ struct dentry *dentry, int type)
+{
+ struct inode *inode = d_inode(dentry);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ if (fuse_no_acl(fc, inode))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ return __fuse_get_acl(fc, idmap, inode, type, false);
+}
+
+struct posix_acl *fuse_get_inode_acl(struct inode *inode, int type, bool rcu)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ /*
+ * FUSE daemons before FUSE_POSIX_ACL was introduced could get and set
+ * POSIX ACLs without them being used for permission checking by the
+ * vfs. Retain that behavior for backwards compatibility as there are
+ * filesystems that do all permission checking for acls in the daemon
+ * and not in the kernel.
+ */
+ if (!fc->posix_acl)
+ return NULL;
+
+ return __fuse_get_acl(fc, &nop_mnt_idmap, inode, type, rcu);
+}
+
+int fuse_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
struct inode *inode = d_inode(dentry);
@@ -64,7 +105,7 @@ int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
if (fuse_is_bad(inode))
return -EIO;
- if (!fc->posix_acl || fc->no_setxattr)
+ if (fc->no_setxattr || fuse_no_acl(fc, inode))
return -EOPNOTSUPP;
if (type == ACL_TYPE_ACCESS)
@@ -99,8 +140,14 @@ int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
return ret;
}
- if (!vfsgid_in_group_p(i_gid_into_vfsgid(&init_user_ns, inode)) &&
- !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID))
+ /*
+ * Fuse daemons without FUSE_POSIX_ACL never changed the passed
+ * through POSIX ACLs. Such daemons don't expect setgid bits to
+ * be stripped.
+ */
+ if (fc->posix_acl &&
+ !vfsgid_in_group_p(i_gid_into_vfsgid(&nop_mnt_idmap, inode)) &&
+ !capable_wrt_inode_uidgid(&nop_mnt_idmap, inode, CAP_FSETID))
extra_flags |= FUSE_SETXATTR_ACL_KILL_SGID;
ret = fuse_setxattr(inode, name, value, size, 0, extra_flags);
@@ -108,8 +155,15 @@ int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
} else {
ret = fuse_removexattr(inode, name);
}
- forget_all_cached_acls(inode);
- fuse_invalidate_attr(inode);
+
+ if (fc->posix_acl) {
+ /*
+ * Fuse daemons without FUSE_POSIX_ACL never cached POSIX ACLs
+ * and didn't invalidate attributes. Retain that behavior.
+ */
+ forget_all_cached_acls(inode);
+ fuse_invalidate_attr(inode);
+ }
return ret;
}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index cd1a071b625a..cd1eae61e84c 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -645,7 +645,7 @@ out_err:
return err;
}
-static int fuse_mknod(struct user_namespace *, struct inode *, struct dentry *,
+static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
umode_t, dev_t);
static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
struct file *file, unsigned flags,
@@ -686,7 +686,7 @@ out_dput:
return err;
mknod:
- err = fuse_mknod(&init_user_ns, dir, entry, mode, 0);
+ err = fuse_mknod(&nop_mnt_idmap, dir, entry, mode, 0);
if (err)
goto out_dput;
no_open:
@@ -773,7 +773,7 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args,
return err;
}
-static int fuse_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int fuse_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *entry, umode_t mode, dev_t rdev)
{
struct fuse_mknod_in inarg;
@@ -796,13 +796,13 @@ static int fuse_mknod(struct user_namespace *mnt_userns, struct inode *dir,
return create_new_entry(fm, &args, dir, entry, mode);
}
-static int fuse_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int fuse_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *entry, umode_t mode, bool excl)
{
- return fuse_mknod(&init_user_ns, dir, entry, mode, 0);
+ return fuse_mknod(&nop_mnt_idmap, dir, entry, mode, 0);
}
-static int fuse_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
struct file *file, umode_t mode)
{
struct fuse_conn *fc = get_fuse_conn(dir);
@@ -819,7 +819,7 @@ static int fuse_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
return err;
}
-static int fuse_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int fuse_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *entry, umode_t mode)
{
struct fuse_mkdir_in inarg;
@@ -841,7 +841,7 @@ static int fuse_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
return create_new_entry(fm, &args, dir, entry, S_IFDIR);
}
-static int fuse_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *entry, const char *link)
{
struct fuse_mount *fm = get_fuse_mount(dir);
@@ -998,7 +998,7 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
return err;
}
-static int fuse_rename2(struct user_namespace *mnt_userns, struct inode *olddir,
+static int fuse_rename2(struct mnt_idmap *idmap, struct inode *olddir,
struct dentry *oldent, struct inode *newdir,
struct dentry *newent, unsigned int flags)
{
@@ -1156,7 +1156,7 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file,
forget_all_cached_acls(inode);
err = fuse_do_getattr(inode, stat, file);
} else if (stat) {
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
stat->mode = fi->orig_i_mode;
stat->ino = fi->orig_ino;
}
@@ -1326,7 +1326,7 @@ static int fuse_perm_getattr(struct inode *inode, int mask)
* access request is sent. Execute permission is still checked
* locally based on file mode.
*/
-static int fuse_permission(struct user_namespace *mnt_userns,
+static int fuse_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
struct fuse_conn *fc = get_fuse_conn(inode);
@@ -1358,7 +1358,7 @@ static int fuse_permission(struct user_namespace *mnt_userns,
}
if (fc->default_permissions) {
- err = generic_permission(&init_user_ns, inode, mask);
+ err = generic_permission(&nop_mnt_idmap, inode, mask);
/* If permission is denied, try to refresh file
attributes. This is also needed, because the root
@@ -1366,7 +1366,7 @@ static int fuse_permission(struct user_namespace *mnt_userns,
if (err == -EACCES && !refreshed) {
err = fuse_perm_getattr(inode, mask);
if (!err)
- err = generic_permission(&init_user_ns,
+ err = generic_permission(&nop_mnt_idmap,
inode, mask);
}
@@ -1690,7 +1690,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
if (!fc->default_permissions)
attr->ia_valid |= ATTR_FORCE;
- err = setattr_prepare(&init_user_ns, dentry, attr);
+ err = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (err)
return err;
@@ -1837,7 +1837,7 @@ error:
return err;
}
-static int fuse_setattr(struct user_namespace *mnt_userns, struct dentry *entry,
+static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry,
struct iattr *attr)
{
struct inode *inode = d_inode(entry);
@@ -1900,7 +1900,7 @@ static int fuse_setattr(struct user_namespace *mnt_userns, struct dentry *entry,
return ret;
}
-static int fuse_getattr(struct user_namespace *mnt_userns,
+static int fuse_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
@@ -1942,7 +1942,8 @@ static const struct inode_operations fuse_dir_inode_operations = {
.permission = fuse_permission,
.getattr = fuse_getattr,
.listxattr = fuse_listxattr,
- .get_inode_acl = fuse_get_acl,
+ .get_inode_acl = fuse_get_inode_acl,
+ .get_acl = fuse_get_acl,
.set_acl = fuse_set_acl,
.fileattr_get = fuse_fileattr_get,
.fileattr_set = fuse_fileattr_set,
@@ -1964,7 +1965,8 @@ static const struct inode_operations fuse_common_inode_operations = {
.permission = fuse_permission,
.getattr = fuse_getattr,
.listxattr = fuse_listxattr,
- .get_inode_acl = fuse_get_acl,
+ .get_inode_acl = fuse_get_inode_acl,
+ .get_acl = fuse_get_acl,
.set_acl = fuse_set_acl,
.fileattr_get = fuse_fileattr_get,
.fileattr_set = fuse_fileattr_set,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 875314ee6f59..82710d103556 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -18,6 +18,7 @@
#include <linux/falloc.h>
#include <linux/uio.h>
#include <linux/fs.h>
+#include <linux/filelock.h>
static int fuse_send_open(struct fuse_mount *fm, u64 nodeid,
unsigned int open_flags, int opcode,
@@ -1313,7 +1314,8 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
return err;
if (fc->handle_killpriv_v2 &&
- setattr_should_drop_suidgid(&init_user_ns, file_inode(file))) {
+ setattr_should_drop_suidgid(&nop_mnt_idmap,
+ file_inode(file))) {
goto writethrough;
}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index c673faefdcb9..9b5058cf5bc3 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1264,12 +1264,12 @@ ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size);
int fuse_removexattr(struct inode *inode, const char *name);
extern const struct xattr_handler *fuse_xattr_handlers[];
-extern const struct xattr_handler *fuse_acl_xattr_handlers[];
-extern const struct xattr_handler *fuse_no_acl_xattr_handlers[];
struct posix_acl;
-struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu);
-int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+struct posix_acl *fuse_get_inode_acl(struct inode *inode, int type, bool rcu);
+struct posix_acl *fuse_get_acl(struct mnt_idmap *idmap,
+ struct dentry *dentry, int type);
+int fuse_set_acl(struct mnt_idmap *, struct dentry *dentry,
struct posix_acl *acl, int type);
/* readdir.c */
@@ -1309,7 +1309,7 @@ long fuse_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
unsigned long arg);
int fuse_fileattr_get(struct dentry *dentry, struct fileattr *fa);
-int fuse_fileattr_set(struct user_namespace *mnt_userns,
+int fuse_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
/* file.c */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6b3beda16c1b..de9b9ec5ce81 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -311,7 +311,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
fuse_dax_dontcache(inode, attr->flags);
}
-static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
+static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr,
+ struct fuse_conn *fc)
{
inode->i_mode = attr->mode & S_IFMT;
inode->i_size = attr->size;
@@ -333,6 +334,12 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
new_decode_dev(attr->rdev));
} else
BUG();
+ /*
+ * Ensure that we don't cache acls for daemons without FUSE_POSIX_ACL
+ * so they see the exact same behavior as before.
+ */
+ if (!fc->posix_acl)
+ inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE;
}
static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
@@ -372,7 +379,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
if (!inode)
return NULL;
- fuse_init_inode(inode, attr);
+ fuse_init_inode(inode, attr, fc);
get_fuse_inode(inode)->nodeid = nodeid;
inode->i_flags |= S_AUTOMOUNT;
goto done;
@@ -388,7 +395,7 @@ retry:
if (!fc->writeback_cache || !S_ISREG(attr->mode))
inode->i_flags |= S_NOCMTIME;
inode->i_generation = generation;
- fuse_init_inode(inode, attr);
+ fuse_init_inode(inode, attr, fc);
unlock_new_inode(inode);
} else if (fuse_stale_inode(inode, generation, attr)) {
/* nodeid was reused, any I/O on the old inode should fail */
@@ -1174,7 +1181,6 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
if ((flags & FUSE_POSIX_ACL)) {
fc->default_permissions = 1;
fc->posix_acl = 1;
- fm->sb->s_xattr = fuse_acl_xattr_handlers;
}
if (flags & FUSE_CACHE_SYMLINKS)
fc->cache_symlinks = 1;
@@ -1420,13 +1426,6 @@ static void fuse_sb_defaults(struct super_block *sb)
if (sb->s_user_ns != &init_user_ns)
sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
-
- /*
- * If we are not in the initial user namespace posix
- * acls must be translated.
- */
- if (sb->s_user_ns != &init_user_ns)
- sb->s_xattr = fuse_no_acl_xattr_handlers;
}
static int fuse_fill_super_submount(struct super_block *sb,
diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c
index fcce94ace2c2..e50a18ee6cc6 100644
--- a/fs/fuse/ioctl.c
+++ b/fs/fuse/ioctl.c
@@ -467,7 +467,7 @@ cleanup:
return err;
}
-int fuse_fileattr_set(struct user_namespace *mnt_userns,
+int fuse_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa)
{
struct inode *inode = d_inode(dentry);
diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c
index 0d3e7177fce0..49c01559580f 100644
--- a/fs/fuse/xattr.c
+++ b/fs/fuse/xattr.c
@@ -189,7 +189,7 @@ static int fuse_xattr_get(const struct xattr_handler *handler,
}
static int fuse_xattr_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value, size_t size,
int flags)
@@ -203,27 +203,6 @@ static int fuse_xattr_set(const struct xattr_handler *handler,
return fuse_setxattr(inode, name, value, size, flags, 0);
}
-static bool no_xattr_list(struct dentry *dentry)
-{
- return false;
-}
-
-static int no_xattr_get(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, void *value, size_t size)
-{
- return -EOPNOTSUPP;
-}
-
-static int no_xattr_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
- struct dentry *dentry, struct inode *nodee,
- const char *name, const void *value,
- size_t size, int flags)
-{
- return -EOPNOTSUPP;
-}
-
static const struct xattr_handler fuse_xattr_handler = {
.prefix = "",
.get = fuse_xattr_get,
@@ -234,33 +213,3 @@ const struct xattr_handler *fuse_xattr_handlers[] = {
&fuse_xattr_handler,
NULL
};
-
-const struct xattr_handler *fuse_acl_xattr_handlers[] = {
- &posix_acl_access_xattr_handler,
- &posix_acl_default_xattr_handler,
- &fuse_xattr_handler,
- NULL
-};
-
-static const struct xattr_handler fuse_no_acl_access_xattr_handler = {
- .name = XATTR_NAME_POSIX_ACL_ACCESS,
- .flags = ACL_TYPE_ACCESS,
- .list = no_xattr_list,
- .get = no_xattr_get,
- .set = no_xattr_set,
-};
-
-static const struct xattr_handler fuse_no_acl_default_xattr_handler = {
- .name = XATTR_NAME_POSIX_ACL_DEFAULT,
- .flags = ACL_TYPE_ACCESS,
- .list = no_xattr_list,
- .get = no_xattr_get,
- .set = no_xattr_set,
-};
-
-const struct xattr_handler *fuse_no_acl_xattr_handlers[] = {
- &fuse_no_acl_access_xattr_handler,
- &fuse_no_acl_default_xattr_handler,
- &fuse_xattr_handler,
- NULL
-};
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 3dcde4912413..a392aa0f041d 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -109,7 +109,7 @@ out:
return error;
}
-int gfs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int gfs2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
struct inode *inode = d_inode(dentry);
@@ -135,7 +135,7 @@ int gfs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
mode = inode->i_mode;
if (type == ACL_TYPE_ACCESS && acl) {
- ret = posix_acl_update_mode(&init_user_ns, inode, &mode, &acl);
+ ret = posix_acl_update_mode(&nop_mnt_idmap, inode, &mode, &acl);
if (ret)
goto unlock;
}
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index b8de8c148f5c..d4deb2b19959 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -13,7 +13,7 @@
extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type, bool rcu);
extern int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type);
-extern int gfs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+extern int gfs2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type);
#endif /* __ACL_DOT_H__ */
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index eea5be4fbf0e..300844f50dcd 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -15,6 +15,7 @@
#include <linux/mm.h>
#include <linux/mount.h>
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/gfs2_ondisk.h>
#include <linux/falloc.h>
#include <linux/swap.h>
@@ -235,7 +236,7 @@ static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask)
goto out;
if (!IS_IMMUTABLE(inode)) {
- error = gfs2_permission(&init_user_ns, inode, MAY_WRITE);
+ error = gfs2_permission(&nop_mnt_idmap, inode, MAY_WRITE);
if (error)
goto out;
}
@@ -273,7 +274,7 @@ out:
return error;
}
-int gfs2_fileattr_set(struct user_namespace *mnt_userns,
+int gfs2_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa)
{
struct inode *inode = d_inode(dentry);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 614db3055c02..713efa3bb732 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -320,7 +320,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
}
if (!is_root) {
- error = gfs2_permission(&init_user_ns, dir, MAY_EXEC);
+ error = gfs2_permission(&nop_mnt_idmap, dir, MAY_EXEC);
if (error)
goto out;
}
@@ -350,7 +350,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
{
int error;
- error = gfs2_permission(&init_user_ns, &dip->i_inode,
+ error = gfs2_permission(&nop_mnt_idmap, &dip->i_inode,
MAY_WRITE | MAY_EXEC);
if (error)
return error;
@@ -843,7 +843,7 @@ fail:
/**
* gfs2_create - Create a file
- * @mnt_userns: User namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dir: The directory in which to create the file
* @dentry: The dentry of the new file
* @mode: The mode of the new file
@@ -852,7 +852,7 @@ fail:
* Returns: errno
*/
-static int gfs2_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int gfs2_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
return gfs2_create_inode(dir, dentry, NULL, S_IFREG | mode, 0, NULL, 0, excl);
@@ -960,7 +960,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
if (inode->i_nlink == 0)
goto out_gunlock;
- error = gfs2_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC);
+ error = gfs2_permission(&nop_mnt_idmap, dir, MAY_WRITE | MAY_EXEC);
if (error)
goto out_gunlock;
@@ -1078,7 +1078,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
if (IS_APPEND(&dip->i_inode))
return -EPERM;
- error = gfs2_permission(&init_user_ns, &dip->i_inode,
+ error = gfs2_permission(&nop_mnt_idmap, &dip->i_inode,
MAY_WRITE | MAY_EXEC);
if (error)
return error;
@@ -1207,7 +1207,7 @@ out_inodes:
/**
* gfs2_symlink - Create a symlink
- * @mnt_userns: User namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dir: The directory to create the symlink in
* @dentry: The dentry to put the symlink in
* @symname: The thing which the link points to
@@ -1215,7 +1215,7 @@ out_inodes:
* Returns: errno
*/
-static int gfs2_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int gfs2_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
unsigned int size;
@@ -1229,7 +1229,7 @@ static int gfs2_symlink(struct user_namespace *mnt_userns, struct inode *dir,
/**
* gfs2_mkdir - Make a directory
- * @mnt_userns: User namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dir: The parent directory of the new one
* @dentry: The dentry of the new directory
* @mode: The mode of the new directory
@@ -1237,7 +1237,7 @@ static int gfs2_symlink(struct user_namespace *mnt_userns, struct inode *dir,
* Returns: errno
*/
-static int gfs2_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int gfs2_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
unsigned dsize = gfs2_max_stuffed_size(GFS2_I(dir));
@@ -1246,7 +1246,7 @@ static int gfs2_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
/**
* gfs2_mknod - Make a special file
- * @mnt_userns: User namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dir: The directory in which the special file will reside
* @dentry: The dentry of the special file
* @mode: The mode of the special file
@@ -1254,7 +1254,7 @@ static int gfs2_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
*
*/
-static int gfs2_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int gfs2_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t dev)
{
return gfs2_create_inode(dir, dentry, NULL, mode, dev, NULL, 0, 0);
@@ -1504,7 +1504,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
}
}
} else {
- error = gfs2_permission(&init_user_ns, ndir,
+ error = gfs2_permission(&nop_mnt_idmap, ndir,
MAY_WRITE | MAY_EXEC);
if (error)
goto out_gunlock;
@@ -1541,7 +1541,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
/* Check out the dir to be renamed */
if (dir_rename) {
- error = gfs2_permission(&init_user_ns, d_inode(odentry),
+ error = gfs2_permission(&nop_mnt_idmap, d_inode(odentry),
MAY_WRITE);
if (error)
goto out_gunlock;
@@ -1705,13 +1705,13 @@ static int gfs2_exchange(struct inode *odir, struct dentry *odentry,
goto out_gunlock;
if (S_ISDIR(old_mode)) {
- error = gfs2_permission(&init_user_ns, odentry->d_inode,
+ error = gfs2_permission(&nop_mnt_idmap, odentry->d_inode,
MAY_WRITE);
if (error)
goto out_gunlock;
}
if (S_ISDIR(new_mode)) {
- error = gfs2_permission(&init_user_ns, ndentry->d_inode,
+ error = gfs2_permission(&nop_mnt_idmap, ndentry->d_inode,
MAY_WRITE);
if (error)
goto out_gunlock;
@@ -1766,7 +1766,7 @@ out:
return error;
}
-static int gfs2_rename2(struct user_namespace *mnt_userns, struct inode *odir,
+static int gfs2_rename2(struct mnt_idmap *idmap, struct inode *odir,
struct dentry *odentry, struct inode *ndir,
struct dentry *ndentry, unsigned int flags)
{
@@ -1841,7 +1841,7 @@ out:
/**
* gfs2_permission
- * @mnt_userns: User namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @inode: The inode
* @mask: The mask to be tested
*
@@ -1852,7 +1852,7 @@ out:
* Returns: errno
*/
-int gfs2_permission(struct user_namespace *mnt_userns, struct inode *inode,
+int gfs2_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask)
{
struct gfs2_inode *ip;
@@ -1872,7 +1872,7 @@ int gfs2_permission(struct user_namespace *mnt_userns, struct inode *inode,
if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
error = -EPERM;
else
- error = generic_permission(&init_user_ns, inode, mask);
+ error = generic_permission(&nop_mnt_idmap, inode, mask);
if (gfs2_holder_initialized(&i_gh))
gfs2_glock_dq_uninit(&i_gh);
@@ -1881,7 +1881,7 @@ int gfs2_permission(struct user_namespace *mnt_userns, struct inode *inode,
static int __gfs2_setattr_simple(struct inode *inode, struct iattr *attr)
{
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(&nop_mnt_idmap, inode, attr);
mark_inode_dirty(inode);
return 0;
}
@@ -1966,7 +1966,7 @@ out:
/**
* gfs2_setattr - Change attributes on an inode
- * @mnt_userns: User namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dentry: The dentry which is changing
* @attr: The structure describing the change
*
@@ -1976,7 +1976,7 @@ out:
* Returns: errno
*/
-static int gfs2_setattr(struct user_namespace *mnt_userns,
+static int gfs2_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
@@ -1992,11 +1992,11 @@ static int gfs2_setattr(struct user_namespace *mnt_userns,
if (error)
goto out;
- error = may_setattr(&init_user_ns, inode, attr->ia_valid);
+ error = may_setattr(&nop_mnt_idmap, inode, attr->ia_valid);
if (error)
goto error;
- error = setattr_prepare(&init_user_ns, dentry, attr);
+ error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (error)
goto error;
@@ -2007,7 +2007,7 @@ static int gfs2_setattr(struct user_namespace *mnt_userns,
else {
error = gfs2_setattr_simple(inode, attr);
if (!error && attr->ia_valid & ATTR_MODE)
- error = posix_acl_chmod(&init_user_ns, dentry,
+ error = posix_acl_chmod(&nop_mnt_idmap, dentry,
inode->i_mode);
}
@@ -2022,7 +2022,7 @@ out:
/**
* gfs2_getattr - Read out an inode's attributes
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @path: Object to query
* @stat: The inode's stats
* @request_mask: Mask of STATX_xxx flags indicating the caller's interests
@@ -2037,7 +2037,7 @@ out:
* Returns: errno
*/
-static int gfs2_getattr(struct user_namespace *mnt_userns,
+static int gfs2_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
@@ -2066,7 +2066,7 @@ static int gfs2_getattr(struct user_namespace *mnt_userns,
STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP);
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
if (gfs2_holder_initialized(&gh))
gfs2_glock_dq_uninit(&gh);
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 0264d514dda7..c8c5814e7295 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -99,7 +99,7 @@ extern int gfs2_inode_refresh(struct gfs2_inode *ip);
extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
int is_root);
-extern int gfs2_permission(struct user_namespace *mnt_userns,
+extern int gfs2_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask);
extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
@@ -111,7 +111,7 @@ extern const struct file_operations gfs2_file_fops_nolock;
extern const struct file_operations gfs2_dir_fops_nolock;
extern int gfs2_fileattr_get(struct dentry *dentry, struct fileattr *fa);
-extern int gfs2_fileattr_set(struct user_namespace *mnt_userns,
+extern int gfs2_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
extern void gfs2_set_inode_flags(struct inode *inode);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 723639376ae2..61323deb80bc 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -80,6 +80,15 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
brelse(bd->bd_bh);
}
+static int __gfs2_writepage(struct page *page, struct writeback_control *wbc,
+ void *data)
+{
+ struct address_space *mapping = data;
+ int ret = mapping->a_ops->writepage(page, wbc);
+ mapping_set_error(mapping, ret);
+ return ret;
+}
+
/**
* gfs2_ail1_start_one - Start I/O on a transaction
* @sdp: The superblock
@@ -131,7 +140,7 @@ __acquires(&sdp->sd_ail_lock)
if (!mapping)
continue;
spin_unlock(&sdp->sd_ail_lock);
- ret = filemap_fdatawrite_wbc(mapping, wbc);
+ ret = write_cache_pages(mapping, wbc, __gfs2_writepage, mapping);
if (need_resched()) {
blk_finish_plug(plug);
cond_resched();
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 518c0677e12a..adf6d17cf033 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1225,7 +1225,7 @@ int __gfs2_xattr_set(struct inode *inode, const char *name,
}
static int gfs2_xattr_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/hfs/attr.c b/fs/hfs/attr.c
index 2bd54efaf416..6341bb248247 100644
--- a/fs/hfs/attr.c
+++ b/fs/hfs/attr.c
@@ -121,7 +121,7 @@ static int hfs_xattr_get(const struct xattr_handler *handler,
}
static int hfs_xattr_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *value, size_t size,
int flags)
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 527f6e46cbe8..3e1e3dcf0b48 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -189,7 +189,7 @@ static int hfs_dir_release(struct inode *inode, struct file *file)
* a directory and return a corresponding inode, given the inode for
* the directory and the name (and its length) of the new file.
*/
-static int hfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int hfs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
struct inode *inode;
@@ -219,7 +219,7 @@ static int hfs_create(struct user_namespace *mnt_userns, struct inode *dir,
* in a directory, given the inode for the parent directory and the
* name (and its length) of the new directory.
*/
-static int hfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int hfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct inode *inode;
@@ -280,7 +280,7 @@ static int hfs_remove(struct inode *dir, struct dentry *dentry)
* new file/directory.
* XXX: how do you handle must_be dir?
*/
-static int hfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+static int hfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 68d0305880f7..49d02524e667 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -206,7 +206,7 @@ int hfs_write_begin(struct file *file, struct address_space *mapping,
extern struct inode *hfs_new_inode(struct inode *, const struct qstr *, umode_t);
extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *);
extern int hfs_write_inode(struct inode *, struct writeback_control *);
-extern int hfs_inode_setattr(struct user_namespace *, struct dentry *,
+extern int hfs_inode_setattr(struct mnt_idmap *, struct dentry *,
struct iattr *);
extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext,
__be32 log_size, __be32 phys_size, u32 clump_size);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 9c329a365e75..1f7bd068acf0 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -458,15 +458,16 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
/* panic? */
return -EIO;
+ res = -EIO;
if (HFS_I(main_inode)->cat_key.CName.len > HFS_NAMELEN)
- return -EIO;
+ goto out;
fd.search_key->cat = HFS_I(main_inode)->cat_key;
if (hfs_brec_find(&fd))
- /* panic? */
goto out;
if (S_ISDIR(main_inode->i_mode)) {
- WARN_ON(fd.entrylength < sizeof(struct hfs_cat_dir));
+ if (fd.entrylength < sizeof(struct hfs_cat_dir))
+ goto out;
hfs_bnode_read(fd.bnode, &rec, fd.entryoffset,
sizeof(struct hfs_cat_dir));
if (rec.type != HFS_CDR_DIR ||
@@ -479,6 +480,8 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
hfs_bnode_write(fd.bnode, &rec, fd.entryoffset,
sizeof(struct hfs_cat_dir));
} else if (HFS_IS_RSRC(inode)) {
+ if (fd.entrylength < sizeof(struct hfs_cat_file))
+ goto out;
hfs_bnode_read(fd.bnode, &rec, fd.entryoffset,
sizeof(struct hfs_cat_file));
hfs_inode_write_fork(inode, rec.file.RExtRec,
@@ -486,7 +489,8 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
hfs_bnode_write(fd.bnode, &rec, fd.entryoffset,
sizeof(struct hfs_cat_file));
} else {
- WARN_ON(fd.entrylength < sizeof(struct hfs_cat_file));
+ if (fd.entrylength < sizeof(struct hfs_cat_file))
+ goto out;
hfs_bnode_read(fd.bnode, &rec, fd.entryoffset,
sizeof(struct hfs_cat_file));
if (rec.type != HFS_CDR_FIL ||
@@ -503,9 +507,10 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
hfs_bnode_write(fd.bnode, &rec, fd.entryoffset,
sizeof(struct hfs_cat_file));
}
+ res = 0;
out:
hfs_find_exit(&fd);
- return 0;
+ return res;
}
static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
@@ -606,14 +611,14 @@ static int hfs_file_release(struct inode *inode, struct file *file)
* correspond to the same HFS file.
*/
-int hfs_inode_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int hfs_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
struct hfs_sb_info *hsb = HFS_SB(inode->i_sb);
int error;
- error = setattr_prepare(&init_user_ns, dentry,
+ error = setattr_prepare(&nop_mnt_idmap, dentry,
attr); /* basic permission checks */
if (error)
return error;
@@ -653,7 +658,7 @@ int hfs_inode_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
current_time(inode);
}
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(&nop_mnt_idmap, inode, attr);
mark_inode_dirty(inode);
return 0;
}
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 84714bbccc12..56fb5f1312e7 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -434,7 +434,7 @@ out:
return res;
}
-static int hfsplus_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int hfsplus_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
@@ -476,7 +476,7 @@ out:
return res;
}
-static int hfsplus_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int hfsplus_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
@@ -517,19 +517,19 @@ out:
return res;
}
-static int hfsplus_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int hfsplus_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
- return hfsplus_mknod(&init_user_ns, dir, dentry, mode, 0);
+ return hfsplus_mknod(&nop_mnt_idmap, dir, dentry, mode, 0);
}
-static int hfsplus_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int hfsplus_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
- return hfsplus_mknod(&init_user_ns, dir, dentry, mode | S_IFDIR, 0);
+ return hfsplus_mknod(&nop_mnt_idmap, dir, dentry, mode | S_IFDIR, 0);
}
-static int hfsplus_rename(struct user_namespace *mnt_userns,
+static int hfsplus_rename(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 6aa919e59483..7ededcb720c1 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -481,13 +481,13 @@ void hfsplus_inode_write_fork(struct inode *inode,
struct hfsplus_fork_raw *fork);
int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd);
int hfsplus_cat_write_inode(struct inode *inode);
-int hfsplus_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int hfsplus_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask,
unsigned int query_flags);
int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
int datasync);
int hfsplus_fileattr_get(struct dentry *dentry, struct fileattr *fa);
-int hfsplus_fileattr_set(struct user_namespace *mnt_userns,
+int hfsplus_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
/* ioctl.c */
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 840577a0c1e7..abb91f5fae92 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -246,13 +246,13 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
return 0;
}
-static int hfsplus_setattr(struct user_namespace *mnt_userns,
+static int hfsplus_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
int error;
- error = setattr_prepare(&init_user_ns, dentry, attr);
+ error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (error)
return error;
@@ -270,13 +270,13 @@ static int hfsplus_setattr(struct user_namespace *mnt_userns,
inode->i_mtime = inode->i_ctime = current_time(inode);
}
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(&nop_mnt_idmap, inode, attr);
mark_inode_dirty(inode);
return 0;
}
-int hfsplus_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int hfsplus_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask,
unsigned int query_flags)
{
@@ -298,7 +298,7 @@ int hfsplus_getattr(struct user_namespace *mnt_userns, const struct path *path,
stat->attributes_mask |= STATX_ATTR_APPEND | STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP;
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
return 0;
}
@@ -390,7 +390,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, struct inode *dir,
return NULL;
inode->i_ino = sbi->next_cnid++;
- inode_init_owner(&init_user_ns, inode, dir, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
set_nlink(inode, 1);
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
@@ -655,7 +655,7 @@ int hfsplus_fileattr_get(struct dentry *dentry, struct fileattr *fa)
return 0;
}
-int hfsplus_fileattr_set(struct user_namespace *mnt_userns,
+int hfsplus_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa)
{
struct inode *inode = d_inode(dentry);
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index 49891b12c415..5b476f57eb17 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -857,7 +857,7 @@ static int hfsplus_osx_getxattr(const struct xattr_handler *handler,
}
static int hfsplus_osx_setxattr(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *buffer,
size_t size, int flags)
diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c
index c1c7a16cbf21..90f68ec119cd 100644
--- a/fs/hfsplus/xattr_security.c
+++ b/fs/hfsplus/xattr_security.c
@@ -23,7 +23,7 @@ static int hfsplus_security_getxattr(const struct xattr_handler *handler,
}
static int hfsplus_security_setxattr(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *buffer,
size_t size, int flags)
diff --git a/fs/hfsplus/xattr_trusted.c b/fs/hfsplus/xattr_trusted.c
index e150372ec564..fdbaebc1c49a 100644
--- a/fs/hfsplus/xattr_trusted.c
+++ b/fs/hfsplus/xattr_trusted.c
@@ -22,7 +22,7 @@ static int hfsplus_trusted_getxattr(const struct xattr_handler *handler,
}
static int hfsplus_trusted_setxattr(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *buffer,
size_t size, int flags)
diff --git a/fs/hfsplus/xattr_user.c b/fs/hfsplus/xattr_user.c
index a6b60b153916..6464b6c3d58d 100644
--- a/fs/hfsplus/xattr_user.c
+++ b/fs/hfsplus/xattr_user.c
@@ -22,7 +22,7 @@ static int hfsplus_user_getxattr(const struct xattr_handler *handler,
}
static int hfsplus_user_setxattr(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *buffer,
size_t size, int flags)
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 277468783fee..c18bb50c31b6 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -559,7 +559,7 @@ static int read_name(struct inode *ino, char *name)
return 0;
}
-static int hostfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int hostfs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
struct inode *inode;
@@ -658,7 +658,7 @@ static int hostfs_unlink(struct inode *ino, struct dentry *dentry)
return err;
}
-static int hostfs_symlink(struct user_namespace *mnt_userns, struct inode *ino,
+static int hostfs_symlink(struct mnt_idmap *idmap, struct inode *ino,
struct dentry *dentry, const char *to)
{
char *file;
@@ -671,7 +671,7 @@ static int hostfs_symlink(struct user_namespace *mnt_userns, struct inode *ino,
return err;
}
-static int hostfs_mkdir(struct user_namespace *mnt_userns, struct inode *ino,
+static int hostfs_mkdir(struct mnt_idmap *idmap, struct inode *ino,
struct dentry *dentry, umode_t mode)
{
char *file;
@@ -696,7 +696,7 @@ static int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
return err;
}
-static int hostfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int hostfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t dev)
{
struct inode *inode;
@@ -734,7 +734,7 @@ static int hostfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
return err;
}
-static int hostfs_rename2(struct user_namespace *mnt_userns,
+static int hostfs_rename2(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
@@ -763,7 +763,7 @@ static int hostfs_rename2(struct user_namespace *mnt_userns,
return err;
}
-static int hostfs_permission(struct user_namespace *mnt_userns,
+static int hostfs_permission(struct mnt_idmap *idmap,
struct inode *ino, int desired)
{
char *name;
@@ -786,11 +786,11 @@ static int hostfs_permission(struct user_namespace *mnt_userns,
err = access_file(name, r, w, x);
__putname(name);
if (!err)
- err = generic_permission(&init_user_ns, ino, desired);
+ err = generic_permission(&nop_mnt_idmap, ino, desired);
return err;
}
-static int hostfs_setattr(struct user_namespace *mnt_userns,
+static int hostfs_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
@@ -800,7 +800,7 @@ static int hostfs_setattr(struct user_namespace *mnt_userns,
int fd = HOSTFS_I(inode)->fd;
- err = setattr_prepare(&init_user_ns, dentry, attr);
+ err = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (err)
return err;
@@ -857,7 +857,7 @@ static int hostfs_setattr(struct user_namespace *mnt_userns,
attr->ia_size != i_size_read(inode))
truncate_setsize(inode, attr->ia_size);
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(&nop_mnt_idmap, inode, attr);
mark_inode_dirty(inode);
return 0;
}
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 167ec6884642..f5a2476c47bf 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -280,7 +280,7 @@ void hpfs_init_inode(struct inode *);
void hpfs_read_inode(struct inode *);
void hpfs_write_inode(struct inode *);
void hpfs_write_inode_nolock(struct inode *);
-int hpfs_setattr(struct user_namespace *, struct dentry *, struct iattr *);
+int hpfs_setattr(struct mnt_idmap *, struct dentry *, struct iattr *);
void hpfs_write_if_changed(struct inode *);
void hpfs_evict_inode(struct inode *);
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 82208cc28ebd..e50e92a42432 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -257,7 +257,7 @@ void hpfs_write_inode_nolock(struct inode *i)
brelse(bh);
}
-int hpfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int hpfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
@@ -275,7 +275,7 @@ int hpfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size)
goto out_unlock;
- error = setattr_prepare(&init_user_ns, dentry, attr);
+ error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (error)
goto out_unlock;
@@ -289,7 +289,7 @@ int hpfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
hpfs_truncate(inode);
}
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(&nop_mnt_idmap, inode, attr);
hpfs_write_inode(inode);
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 15fc63276caa..69fb40b2c99a 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -20,7 +20,7 @@ static void hpfs_update_directory_times(struct inode *dir)
hpfs_write_inode_nolock(dir);
}
-static int hpfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int hpfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
const unsigned char *name = dentry->d_name.name;
@@ -129,7 +129,7 @@ bail:
return err;
}
-static int hpfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int hpfs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
const unsigned char *name = dentry->d_name.name;
@@ -217,7 +217,7 @@ bail:
return err;
}
-static int hpfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int hpfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
const unsigned char *name = dentry->d_name.name;
@@ -292,7 +292,7 @@ bail:
return err;
}
-static int hpfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int hpfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symlink)
{
const unsigned char *name = dentry->d_name.name;
@@ -512,7 +512,7 @@ const struct address_space_operations hpfs_symlink_aops = {
.read_folio = hpfs_symlink_read_folio
};
-static int hpfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+static int hpfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 790d2727141a..0ce1cc4c2add 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -898,7 +898,7 @@ out:
return error;
}
-static int hugetlbfs_setattr(struct user_namespace *mnt_userns,
+static int hugetlbfs_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
@@ -907,7 +907,7 @@ static int hugetlbfs_setattr(struct user_namespace *mnt_userns,
unsigned int ia_valid = attr->ia_valid;
struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
- error = setattr_prepare(&init_user_ns, dentry, attr);
+ error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (error)
return error;
@@ -924,7 +924,7 @@ static int hugetlbfs_setattr(struct user_namespace *mnt_userns,
hugetlb_vmtruncate(inode, newsize);
}
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(&nop_mnt_idmap, inode, attr);
mark_inode_dirty(inode);
return 0;
}
@@ -980,7 +980,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
inode->i_ino = get_next_ino();
- inode_init_owner(&init_user_ns, inode, dir, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
&hugetlbfs_i_mmap_rwsem_key);
inode->i_mapping->a_ops = &hugetlbfs_aops;
@@ -1019,7 +1019,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
/*
* File creation. Allocate an inode, and we're done..
*/
-static int hugetlbfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int hugetlbfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t dev)
{
struct inode *inode;
@@ -1033,24 +1033,24 @@ static int hugetlbfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
return 0;
}
-static int hugetlbfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int hugetlbfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
- int retval = hugetlbfs_mknod(&init_user_ns, dir, dentry,
+ int retval = hugetlbfs_mknod(&nop_mnt_idmap, dir, dentry,
mode | S_IFDIR, 0);
if (!retval)
inc_nlink(dir);
return retval;
}
-static int hugetlbfs_create(struct user_namespace *mnt_userns,
+static int hugetlbfs_create(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry,
umode_t mode, bool excl)
{
- return hugetlbfs_mknod(&init_user_ns, dir, dentry, mode | S_IFREG, 0);
+ return hugetlbfs_mknod(&nop_mnt_idmap, dir, dentry, mode | S_IFREG, 0);
}
-static int hugetlbfs_tmpfile(struct user_namespace *mnt_userns,
+static int hugetlbfs_tmpfile(struct mnt_idmap *idmap,
struct inode *dir, struct file *file,
umode_t mode)
{
@@ -1064,7 +1064,7 @@ static int hugetlbfs_tmpfile(struct user_namespace *mnt_userns,
return finish_open_simple(file, 0);
}
-static int hugetlbfs_symlink(struct user_namespace *mnt_userns,
+static int hugetlbfs_symlink(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry,
const char *symname)
{
diff --git a/fs/init.c b/fs/init.c
index 5c36adaa9b44..9684406a8416 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -157,7 +157,7 @@ int __init init_mknod(const char *filename, umode_t mode, unsigned int dev)
mode &= ~current_umask();
error = security_path_mknod(&path, dentry, mode, dev);
if (!error)
- error = vfs_mknod(mnt_user_ns(path.mnt), path.dentry->d_inode,
+ error = vfs_mknod(mnt_idmap(path.mnt), path.dentry->d_inode,
dentry, mode, new_decode_dev(dev));
done_path_create(&path, dentry);
return error;
@@ -167,7 +167,7 @@ int __init init_link(const char *oldname, const char *newname)
{
struct dentry *new_dentry;
struct path old_path, new_path;
- struct user_namespace *mnt_userns;
+ struct mnt_idmap *idmap;
int error;
error = kern_path(oldname, 0, &old_path);
@@ -182,14 +182,14 @@ int __init init_link(const char *oldname, const char *newname)
error = -EXDEV;
if (old_path.mnt != new_path.mnt)
goto out_dput;
- mnt_userns = mnt_user_ns(new_path.mnt);
- error = may_linkat(mnt_userns, &old_path);
+ idmap = mnt_idmap(new_path.mnt);
+ error = may_linkat(idmap, &old_path);
if (unlikely(error))
goto out_dput;
error = security_path_link(old_path.dentry, &new_path, new_dentry);
if (error)
goto out_dput;
- error = vfs_link(old_path.dentry, mnt_userns, new_path.dentry->d_inode,
+ error = vfs_link(old_path.dentry, idmap, new_path.dentry->d_inode,
new_dentry, NULL);
out_dput:
done_path_create(&new_path, new_dentry);
@@ -209,7 +209,7 @@ int __init init_symlink(const char *oldname, const char *newname)
return PTR_ERR(dentry);
error = security_path_symlink(&path, dentry, oldname);
if (!error)
- error = vfs_symlink(mnt_user_ns(path.mnt), path.dentry->d_inode,
+ error = vfs_symlink(mnt_idmap(path.mnt), path.dentry->d_inode,
dentry, oldname);
done_path_create(&path, dentry);
return error;
@@ -233,7 +233,7 @@ int __init init_mkdir(const char *pathname, umode_t mode)
mode &= ~current_umask();
error = security_path_mkdir(&path, dentry, mode);
if (!error)
- error = vfs_mkdir(mnt_user_ns(path.mnt), path.dentry->d_inode,
+ error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
dentry, mode);
done_path_create(&path, dentry);
return error;
diff --git a/fs/inode.c b/fs/inode.c
index f453eb58fd03..4558dc2f1355 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -5,6 +5,7 @@
*/
#include <linux/export.h>
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/mm.h>
#include <linux/backing-dev.h>
#include <linux/hash.h>
@@ -1893,7 +1894,7 @@ bool atime_needs_update(const struct path *path, struct inode *inode)
/* Atime updates will likely cause i_uid and i_gid to be written
* back improprely if their true value is unknown to the vfs.
*/
- if (HAS_UNMAPPED_ID(mnt_user_ns(mnt), inode))
+ if (HAS_UNMAPPED_ID(mnt_idmap(mnt), inode))
return false;
if (IS_NOATIME(inode))
@@ -1953,7 +1954,7 @@ EXPORT_SYMBOL(touch_atime);
* response to write or truncate. Return 0 if nothing has to be changed.
* Negative value on error (change should be denied).
*/
-int dentry_needs_remove_privs(struct user_namespace *mnt_userns,
+int dentry_needs_remove_privs(struct mnt_idmap *idmap,
struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
@@ -1963,7 +1964,7 @@ int dentry_needs_remove_privs(struct user_namespace *mnt_userns,
if (IS_NOSEC(inode))
return 0;
- mask = setattr_should_drop_suidgid(mnt_userns, inode);
+ mask = setattr_should_drop_suidgid(idmap, inode);
ret = security_inode_need_killpriv(dentry);
if (ret < 0)
return ret;
@@ -1972,7 +1973,7 @@ int dentry_needs_remove_privs(struct user_namespace *mnt_userns,
return mask;
}
-static int __remove_privs(struct user_namespace *mnt_userns,
+static int __remove_privs(struct mnt_idmap *idmap,
struct dentry *dentry, int kill)
{
struct iattr newattrs;
@@ -1982,7 +1983,7 @@ static int __remove_privs(struct user_namespace *mnt_userns,
* Note we call this on write, so notify_change will not
* encounter any conflicting delegations:
*/
- return notify_change(mnt_userns, dentry, &newattrs, NULL);
+ return notify_change(idmap, dentry, &newattrs, NULL);
}
static int __file_remove_privs(struct file *file, unsigned int flags)
@@ -1995,7 +1996,7 @@ static int __file_remove_privs(struct file *file, unsigned int flags)
if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
return 0;
- kill = dentry_needs_remove_privs(file_mnt_user_ns(file), dentry);
+ kill = dentry_needs_remove_privs(file_mnt_idmap(file), dentry);
if (kill < 0)
return kill;
@@ -2003,7 +2004,7 @@ static int __file_remove_privs(struct file *file, unsigned int flags)
if (flags & IOCB_NOWAIT)
return -EAGAIN;
- error = __remove_privs(file_mnt_user_ns(file), dentry, kill);
+ error = __remove_privs(file_mnt_idmap(file), dentry, kill);
}
if (!error)
@@ -2279,21 +2280,21 @@ EXPORT_SYMBOL(init_special_inode);
/**
* inode_init_owner - Init uid,gid,mode for new inode according to posix standards
- * @mnt_userns: User namespace of the mount the inode was created from
+ * @idmap: idmap of the mount the inode was created from
* @inode: New inode
* @dir: Directory inode
* @mode: mode of the new inode
*
- * If the inode has been created through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then take
- * care to map the inode according to @mnt_userns before checking permissions
+ * If the inode has been created through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then take
+ * care to map the inode according to @idmap before checking permissions
* and initializing i_uid and i_gid. On non-idmapped mounts or if permission
- * checking is to be performed on the raw inode simply passs init_user_ns.
+ * checking is to be performed on the raw inode simply pass @nop_mnt_idmap.
*/
-void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode,
+void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode,
const struct inode *dir, umode_t mode)
{
- inode_fsuid_set(inode, mnt_userns);
+ inode_fsuid_set(inode, idmap);
if (dir && dir->i_mode & S_ISGID) {
inode->i_gid = dir->i_gid;
@@ -2301,32 +2302,32 @@ void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode,
if (S_ISDIR(mode))
mode |= S_ISGID;
} else
- inode_fsgid_set(inode, mnt_userns);
+ inode_fsgid_set(inode, idmap);
inode->i_mode = mode;
}
EXPORT_SYMBOL(inode_init_owner);
/**
* inode_owner_or_capable - check current task permissions to inode
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @inode: inode being checked
*
* Return true if current either has CAP_FOWNER in a namespace with the
* inode owner uid mapped, or owns the file.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then take
- * care to map the inode according to @mnt_userns before checking permissions.
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then take
+ * care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs init_user_ns.
+ * raw inode simply passs @nop_mnt_idmap.
*/
-bool inode_owner_or_capable(struct user_namespace *mnt_userns,
+bool inode_owner_or_capable(struct mnt_idmap *idmap,
const struct inode *inode)
{
vfsuid_t vfsuid;
struct user_namespace *ns;
- vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
+ vfsuid = i_uid_into_vfsuid(idmap, inode);
if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
return true;
@@ -2458,7 +2459,7 @@ EXPORT_SYMBOL(current_time);
/**
* in_group_or_capable - check whether caller is CAP_FSETID privileged
- * @mnt_userns: user namespace of the mount @inode was found from
+ * @idmap: idmap of the mount @inode was found from
* @inode: inode to check
* @vfsgid: the new/current vfsgid of @inode
*
@@ -2468,19 +2469,19 @@ EXPORT_SYMBOL(current_time);
*
* Return: true if the caller is sufficiently privileged, false if not.
*/
-bool in_group_or_capable(struct user_namespace *mnt_userns,
+bool in_group_or_capable(struct mnt_idmap *idmap,
const struct inode *inode, vfsgid_t vfsgid)
{
if (vfsgid_in_group_p(vfsgid))
return true;
- if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
+ if (capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
return true;
return false;
}
/**
* mode_strip_sgid - handle the sgid bit for non-directories
- * @mnt_userns: User namespace of the mount the inode was created from
+ * @idmap: idmap of the mount the inode was created from
* @dir: parent directory inode
* @mode: mode of the file to be created in @dir
*
@@ -2492,15 +2493,14 @@ bool in_group_or_capable(struct user_namespace *mnt_userns,
*
* Return: the new mode to use for the file
*/
-umode_t mode_strip_sgid(struct user_namespace *mnt_userns,
+umode_t mode_strip_sgid(struct mnt_idmap *idmap,
const struct inode *dir, umode_t mode)
{
if ((mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP))
return mode;
if (S_ISDIR(mode) || !dir || !(dir->i_mode & S_ISGID))
return mode;
- if (in_group_or_capable(mnt_userns, dir,
- i_gid_into_vfsgid(mnt_userns, dir)))
+ if (in_group_or_capable(idmap, dir, i_gid_into_vfsgid(idmap, dir)))
return mode;
return mode & ~S_ISGID;
}
diff --git a/fs/internal.h b/fs/internal.h
index a803cc3cf716..766e8a554b2c 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -14,9 +14,9 @@ struct path;
struct mount;
struct shrink_control;
struct fs_context;
-struct user_namespace;
struct pipe_inode_info;
struct iov_iter;
+struct mnt_idmap;
/*
* block/bdev.c
@@ -63,7 +63,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
const char *, unsigned int, struct path *);
int do_rmdir(int dfd, struct filename *name);
int do_unlinkat(int dfd, struct filename *name);
-int may_linkat(struct user_namespace *mnt_userns, const struct path *link);
+int may_linkat(struct mnt_idmap *idmap, const struct path *link);
int do_renameat2(int olddfd, struct filename *oldname, int newdfd,
struct filename *newname, unsigned int flags);
int do_mkdirat(int dfd, struct filename *name, umode_t mode);
@@ -150,8 +150,8 @@ extern int vfs_open(const struct path *, struct file *);
* inode.c
*/
extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
-int dentry_needs_remove_privs(struct user_namespace *, struct dentry *dentry);
-bool in_group_or_capable(struct user_namespace *mnt_userns,
+int dentry_needs_remove_privs(struct mnt_idmap *, struct dentry *dentry);
+bool in_group_or_capable(struct mnt_idmap *idmap,
const struct inode *inode, vfsgid_t vfsgid);
/*
@@ -234,7 +234,7 @@ ssize_t do_getxattr(struct mnt_idmap *idmap,
int setxattr_copy(const char __user *name, struct xattr_ctx *ctx);
int do_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct xattr_ctx *ctx);
-int may_write_xattr(struct user_namespace *mnt_userns, struct inode *inode);
+int may_write_xattr(struct mnt_idmap *idmap, struct inode *inode);
#ifdef CONFIG_FS_POSIX_ACL
int do_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
@@ -261,5 +261,8 @@ ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *po
/*
* fs/attr.c
*/
-int setattr_should_drop_sgid(struct user_namespace *mnt_userns,
+int setattr_should_drop_sgid(struct mnt_idmap *idmap,
const struct inode *inode);
+struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns);
+struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap);
+void mnt_idmap_put(struct mnt_idmap *idmap);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 80ac36aea913..5b2481cd4750 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -651,7 +651,7 @@ static int fileattr_set_prepare(struct inode *inode,
/**
* vfs_fileattr_set - change miscellaneous file attributes
- * @mnt_userns: user namespace of the mount
+ * @idmap: idmap of the mount
* @dentry: the object to change
* @fa: fileattr pointer
*
@@ -665,7 +665,7 @@ static int fileattr_set_prepare(struct inode *inode,
*
* Return: 0 on success, or a negative error on failure.
*/
-int vfs_fileattr_set(struct user_namespace *mnt_userns, struct dentry *dentry,
+int vfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry,
struct fileattr *fa)
{
struct inode *inode = d_inode(dentry);
@@ -675,7 +675,7 @@ int vfs_fileattr_set(struct user_namespace *mnt_userns, struct dentry *dentry,
if (!inode->i_op->fileattr_set)
return -ENOIOCTLCMD;
- if (!inode_owner_or_capable(mnt_userns, inode))
+ if (!inode_owner_or_capable(idmap, inode))
return -EPERM;
inode_lock(inode);
@@ -693,7 +693,7 @@ int vfs_fileattr_set(struct user_namespace *mnt_userns, struct dentry *dentry,
}
err = fileattr_set_prepare(inode, &old_ma, fa);
if (!err)
- err = inode->i_op->fileattr_set(mnt_userns, dentry, fa);
+ err = inode->i_op->fileattr_set(idmap, dentry, fa);
}
inode_unlock(inode);
@@ -714,7 +714,7 @@ static int ioctl_getflags(struct file *file, unsigned int __user *argp)
static int ioctl_setflags(struct file *file, unsigned int __user *argp)
{
- struct user_namespace *mnt_userns = file_mnt_user_ns(file);
+ struct mnt_idmap *idmap = file_mnt_idmap(file);
struct dentry *dentry = file->f_path.dentry;
struct fileattr fa;
unsigned int flags;
@@ -725,7 +725,7 @@ static int ioctl_setflags(struct file *file, unsigned int __user *argp)
err = mnt_want_write_file(file);
if (!err) {
fileattr_fill_flags(&fa, flags);
- err = vfs_fileattr_set(mnt_userns, dentry, &fa);
+ err = vfs_fileattr_set(idmap, dentry, &fa);
mnt_drop_write_file(file);
}
}
@@ -746,7 +746,7 @@ static int ioctl_fsgetxattr(struct file *file, void __user *argp)
static int ioctl_fssetxattr(struct file *file, void __user *argp)
{
- struct user_namespace *mnt_userns = file_mnt_user_ns(file);
+ struct mnt_idmap *idmap = file_mnt_idmap(file);
struct dentry *dentry = file->f_path.dentry;
struct fileattr fa;
int err;
@@ -755,7 +755,7 @@ static int ioctl_fssetxattr(struct file *file, void __user *argp)
if (!err) {
err = mnt_want_write_file(file);
if (!err) {
- err = vfs_fileattr_set(mnt_userns, dentry, &fa);
+ err = vfs_fileattr_set(idmap, dentry, &fa);
mnt_drop_write_file(file);
}
}
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 9804714b1751..f771001574d0 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -217,16 +217,10 @@ static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
{
blk_opf_t opflags = REQ_SYNC | REQ_IDLE;
- if (!(dio->flags & IOMAP_DIO_WRITE)) {
- WARN_ON_ONCE(iomap->flags & IOMAP_F_ZONE_APPEND);
+ if (!(dio->flags & IOMAP_DIO_WRITE))
return REQ_OP_READ;
- }
-
- if (iomap->flags & IOMAP_F_ZONE_APPEND)
- opflags |= REQ_OP_ZONE_APPEND;
- else
- opflags |= REQ_OP_WRITE;
+ opflags |= REQ_OP_WRITE;
if (use_fua)
opflags |= REQ_FUA;
else
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 8bb58ce5c06c..888a7ceb6479 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -229,7 +229,7 @@ static int __jffs2_set_acl(struct inode *inode, int xprefix, struct posix_acl *a
return rc;
}
-int jffs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int jffs2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
int rc, xprefix;
@@ -241,7 +241,7 @@ int jffs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
if (acl) {
umode_t mode;
- rc = posix_acl_update_mode(&init_user_ns, inode, &mode,
+ rc = posix_acl_update_mode(&nop_mnt_idmap, inode, &mode,
&acl);
if (rc)
return rc;
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index ca36a6eca594..e976b8cb82cf 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -28,7 +28,7 @@ struct jffs2_acl_header {
#ifdef CONFIG_JFFS2_FS_POSIX_ACL
struct posix_acl *jffs2_get_acl(struct inode *inode, int type, bool rcu);
-int jffs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int jffs2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type);
extern int jffs2_init_acl_pre(struct inode *, struct inode *, umode_t *);
extern int jffs2_init_acl_post(struct inode *);
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index f399b390b5f6..5075a0a6d594 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -24,20 +24,20 @@
static int jffs2_readdir (struct file *, struct dir_context *);
-static int jffs2_create (struct user_namespace *, struct inode *,
+static int jffs2_create (struct mnt_idmap *, struct inode *,
struct dentry *, umode_t, bool);
static struct dentry *jffs2_lookup (struct inode *,struct dentry *,
unsigned int);
static int jffs2_link (struct dentry *,struct inode *,struct dentry *);
static int jffs2_unlink (struct inode *,struct dentry *);
-static int jffs2_symlink (struct user_namespace *, struct inode *,
+static int jffs2_symlink (struct mnt_idmap *, struct inode *,
struct dentry *, const char *);
-static int jffs2_mkdir (struct user_namespace *, struct inode *,struct dentry *,
+static int jffs2_mkdir (struct mnt_idmap *, struct inode *,struct dentry *,
umode_t);
static int jffs2_rmdir (struct inode *,struct dentry *);
-static int jffs2_mknod (struct user_namespace *, struct inode *,struct dentry *,
+static int jffs2_mknod (struct mnt_idmap *, struct inode *,struct dentry *,
umode_t,dev_t);
-static int jffs2_rename (struct user_namespace *, struct inode *,
+static int jffs2_rename (struct mnt_idmap *, struct inode *,
struct dentry *, struct inode *, struct dentry *,
unsigned int);
@@ -160,7 +160,7 @@ static int jffs2_readdir(struct file *file, struct dir_context *ctx)
/***********************************************************************/
-static int jffs2_create(struct user_namespace *mnt_userns, struct inode *dir_i,
+static int jffs2_create(struct mnt_idmap *idmap, struct inode *dir_i,
struct dentry *dentry, umode_t mode, bool excl)
{
struct jffs2_raw_inode *ri;
@@ -279,7 +279,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
/***********************************************************************/
-static int jffs2_symlink (struct user_namespace *mnt_userns, struct inode *dir_i,
+static int jffs2_symlink (struct mnt_idmap *idmap, struct inode *dir_i,
struct dentry *dentry, const char *target)
{
struct jffs2_inode_info *f, *dir_f;
@@ -442,7 +442,7 @@ static int jffs2_symlink (struct user_namespace *mnt_userns, struct inode *dir_i
}
-static int jffs2_mkdir (struct user_namespace *mnt_userns, struct inode *dir_i,
+static int jffs2_mkdir (struct mnt_idmap *idmap, struct inode *dir_i,
struct dentry *dentry, umode_t mode)
{
struct jffs2_inode_info *f, *dir_f;
@@ -614,7 +614,7 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
return ret;
}
-static int jffs2_mknod (struct user_namespace *mnt_userns, struct inode *dir_i,
+static int jffs2_mknod (struct mnt_idmap *idmap, struct inode *dir_i,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct jffs2_inode_info *f, *dir_f;
@@ -762,7 +762,7 @@ static int jffs2_mknod (struct user_namespace *mnt_userns, struct inode *dir_i,
return ret;
}
-static int jffs2_rename (struct user_namespace *mnt_userns,
+static int jffs2_rename (struct mnt_idmap *idmap,
struct inode *old_dir_i, struct dentry *old_dentry,
struct inode *new_dir_i, struct dentry *new_dentry,
unsigned int flags)
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 66af51c41619..09174898efd0 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -190,19 +190,19 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
return 0;
}
-int jffs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int jffs2_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
int rc;
- rc = setattr_prepare(&init_user_ns, dentry, iattr);
+ rc = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
if (rc)
return rc;
rc = jffs2_do_setattr(inode, iattr);
if (!rc && (iattr->ia_valid & ATTR_MODE))
- rc = posix_acl_chmod(&init_user_ns, dentry, inode->i_mode);
+ rc = posix_acl_chmod(&nop_mnt_idmap, dentry, inode->i_mode);
return rc;
}
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 921d782583d6..8da19766c101 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -164,7 +164,7 @@ long jffs2_ioctl(struct file *, unsigned int, unsigned long);
extern const struct inode_operations jffs2_symlink_inode_operations;
/* fs.c */
-int jffs2_setattr (struct user_namespace *, struct dentry *, struct iattr *);
+int jffs2_setattr (struct mnt_idmap *, struct dentry *, struct iattr *);
int jffs2_do_setattr (struct inode *, struct iattr *);
struct inode *jffs2_iget(struct super_block *, unsigned long);
void jffs2_evict_inode (struct inode *);
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index aef5522551db..437f3a2c1b54 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -57,7 +57,7 @@ static int jffs2_security_getxattr(const struct xattr_handler *handler,
}
static int jffs2_security_setxattr(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *buffer,
size_t size, int flags)
diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c
index cc3f24883e7d..b7c5da2d89bd 100644
--- a/fs/jffs2/xattr_trusted.c
+++ b/fs/jffs2/xattr_trusted.c
@@ -25,7 +25,7 @@ static int jffs2_trusted_getxattr(const struct xattr_handler *handler,
}
static int jffs2_trusted_setxattr(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *buffer,
size_t size, int flags)
diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c
index fb945977c013..f64edce4927b 100644
--- a/fs/jffs2/xattr_user.c
+++ b/fs/jffs2/xattr_user.c
@@ -25,7 +25,7 @@ static int jffs2_user_getxattr(const struct xattr_handler *handler,
}
static int jffs2_user_setxattr(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *buffer,
size_t size, int flags)
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 3b667eccc73b..fb96f872d207 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -94,7 +94,7 @@ out:
return rc;
}
-int jfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int jfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
int rc;
@@ -106,7 +106,7 @@ int jfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
tid = txBegin(inode->i_sb, 0);
mutex_lock(&JFS_IP(inode)->commit_mutex);
if (type == ACL_TYPE_ACCESS && acl) {
- rc = posix_acl_update_mode(&init_user_ns, inode, &mode, &acl);
+ rc = posix_acl_update_mode(&nop_mnt_idmap, inode, &mode, &acl);
if (rc)
goto end_tx;
if (mode != inode->i_mode)
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 88663465aecd..2ee35be49de1 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -85,24 +85,24 @@ static int jfs_release(struct inode *inode, struct file *file)
return 0;
}
-int jfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int jfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
int rc;
- rc = setattr_prepare(&init_user_ns, dentry, iattr);
+ rc = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
if (rc)
return rc;
- if (is_quota_modification(mnt_userns, inode, iattr)) {
+ if (is_quota_modification(&nop_mnt_idmap, inode, iattr)) {
rc = dquot_initialize(inode);
if (rc)
return rc;
}
if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) ||
(iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) {
- rc = dquot_transfer(mnt_userns, inode, iattr);
+ rc = dquot_transfer(&nop_mnt_idmap, inode, iattr);
if (rc)
return rc;
}
@@ -119,11 +119,11 @@ int jfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
jfs_truncate(inode);
}
- setattr_copy(&init_user_ns, inode, iattr);
+ setattr_copy(&nop_mnt_idmap, inode, iattr);
mark_inode_dirty(inode);
if (iattr->ia_valid & ATTR_MODE)
- rc = posix_acl_chmod(&init_user_ns, dentry, inode->i_mode);
+ rc = posix_acl_chmod(&nop_mnt_idmap, dentry, inode->i_mode);
return rc;
}
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index 1e7b177ece60..ed7989bc2db1 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -70,7 +70,7 @@ int jfs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
return 0;
}
-int jfs_fileattr_set(struct user_namespace *mnt_userns,
+int jfs_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa)
{
struct inode *inode = d_inode(dentry);
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index f0704a25835f..f892e54d0fcd 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -8,7 +8,7 @@
#ifdef CONFIG_JFS_POSIX_ACL
struct posix_acl *jfs_get_acl(struct inode *inode, int type, bool rcu);
-int jfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int jfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type);
int jfs_init_acl(tid_t, struct inode *, struct inode *);
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 59379089e939..9e1f02767201 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -64,7 +64,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
goto fail_put;
}
- inode_init_owner(&init_user_ns, inode, parent, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, parent, mode);
/*
* New inodes need to save sane values on disk when
* uid & gid mount options are used
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 7de961a81862..ea80661597ac 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -10,7 +10,7 @@ struct fid;
extern struct inode *ialloc(struct inode *, umode_t);
extern int jfs_fsync(struct file *, loff_t, loff_t, int);
extern int jfs_fileattr_get(struct dentry *dentry, struct fileattr *fa);
-extern int jfs_fileattr_set(struct user_namespace *mnt_userns,
+extern int jfs_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
extern long jfs_ioctl(struct file *, unsigned int, unsigned long);
extern struct inode *jfs_iget(struct super_block *, unsigned long);
@@ -28,7 +28,7 @@ extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type);
extern void jfs_set_inode_flags(struct inode *);
extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
-extern int jfs_setattr(struct user_namespace *, struct dentry *, struct iattr *);
+extern int jfs_setattr(struct mnt_idmap *, struct dentry *, struct iattr *);
extern const struct address_space_operations jfs_aops;
extern const struct inode_operations jfs_dir_inode_operations;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index a38d14eed047..b29d68b5eec5 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -59,7 +59,7 @@ static inline void free_ea_wmap(struct inode *inode)
* RETURN: Errors from subroutines
*
*/
-static int jfs_create(struct user_namespace *mnt_userns, struct inode *dip,
+static int jfs_create(struct mnt_idmap *idmap, struct inode *dip,
struct dentry *dentry, umode_t mode, bool excl)
{
int rc = 0;
@@ -192,7 +192,7 @@ static int jfs_create(struct user_namespace *mnt_userns, struct inode *dip,
* note:
* EACCES: user needs search+write permission on the parent directory
*/
-static int jfs_mkdir(struct user_namespace *mnt_userns, struct inode *dip,
+static int jfs_mkdir(struct mnt_idmap *idmap, struct inode *dip,
struct dentry *dentry, umode_t mode)
{
int rc = 0;
@@ -869,7 +869,7 @@ static int jfs_link(struct dentry *old_dentry,
* an intermediate result whose length exceeds PATH_MAX [XPG4.2]
*/
-static int jfs_symlink(struct user_namespace *mnt_userns, struct inode *dip,
+static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip,
struct dentry *dentry, const char *name)
{
int rc;
@@ -1059,7 +1059,7 @@ static int jfs_symlink(struct user_namespace *mnt_userns, struct inode *dip,
*
* FUNCTION: rename a file or directory
*/
-static int jfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+static int jfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
@@ -1345,7 +1345,7 @@ static int jfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
*
* FUNCTION: Create a special file (device)
*/
-static int jfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int jfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct jfs_inode_info *jfs_ip;
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index f9273f6901c8..f817798fa1eb 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -932,7 +932,7 @@ static int jfs_xattr_get(const struct xattr_handler *handler,
}
static int jfs_xattr_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
@@ -951,7 +951,7 @@ static int jfs_xattr_get_os2(const struct xattr_handler *handler,
}
static int jfs_xattr_set_os2(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 935ef8cb02b2..e3181c3e1988 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -1200,7 +1200,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
return d_splice_alias(inode, dentry);
}
-static int kernfs_iop_mkdir(struct user_namespace *mnt_userns,
+static int kernfs_iop_mkdir(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry,
umode_t mode)
{
@@ -1238,7 +1238,7 @@ static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
return ret;
}
-static int kernfs_iop_rename(struct user_namespace *mnt_userns,
+static int kernfs_iop_rename(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index eac0f210299a..30494dcb0df3 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -107,7 +107,7 @@ int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
return ret;
}
-int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int kernfs_iop_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
@@ -120,7 +120,7 @@ int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
root = kernfs_root(kn);
down_write(&root->kernfs_rwsem);
- error = setattr_prepare(&init_user_ns, dentry, iattr);
+ error = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
if (error)
goto out;
@@ -129,7 +129,7 @@ int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
goto out;
/* this ignores size changes */
- setattr_copy(&init_user_ns, inode, iattr);
+ setattr_copy(&nop_mnt_idmap, inode, iattr);
out:
up_write(&root->kernfs_rwsem);
@@ -181,7 +181,7 @@ static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode)
set_nlink(inode, kn->dir.subdirs + 2);
}
-int kernfs_iop_getattr(struct user_namespace *mnt_userns,
+int kernfs_iop_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
@@ -191,7 +191,7 @@ int kernfs_iop_getattr(struct user_namespace *mnt_userns,
down_read(&root->kernfs_rwsem);
kernfs_refresh_inode(kn, inode);
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
up_read(&root->kernfs_rwsem);
return 0;
@@ -272,7 +272,7 @@ void kernfs_evict_inode(struct inode *inode)
kernfs_put(kn);
}
-int kernfs_iop_permission(struct user_namespace *mnt_userns,
+int kernfs_iop_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
struct kernfs_node *kn;
@@ -287,7 +287,7 @@ int kernfs_iop_permission(struct user_namespace *mnt_userns,
down_read(&root->kernfs_rwsem);
kernfs_refresh_inode(kn, inode);
- ret = generic_permission(&init_user_ns, inode, mask);
+ ret = generic_permission(&nop_mnt_idmap, inode, mask);
up_read(&root->kernfs_rwsem);
return ret;
@@ -324,7 +324,7 @@ static int kernfs_vfs_xattr_get(const struct xattr_handler *handler,
}
static int kernfs_vfs_xattr_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *suffix, const void *value,
size_t size, int flags)
@@ -391,7 +391,7 @@ static int kernfs_vfs_user_xattr_rm(struct kernfs_node *kn,
}
static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *suffix, const void *value,
size_t size, int flags)
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 9046d9f39e63..236c3a6113f1 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -127,11 +127,11 @@ extern struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache;
*/
extern const struct xattr_handler *kernfs_xattr_handlers[];
void kernfs_evict_inode(struct inode *inode);
-int kernfs_iop_permission(struct user_namespace *mnt_userns,
+int kernfs_iop_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask);
-int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int kernfs_iop_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *iattr);
-int kernfs_iop_getattr(struct user_namespace *mnt_userns,
+int kernfs_iop_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags);
ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size);
diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c
index 2a39ffb8423b..6e61b5bc7d86 100644
--- a/fs/ksmbd/auth.c
+++ b/fs/ksmbd/auth.c
@@ -322,7 +322,8 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
dn_off = le32_to_cpu(authblob->DomainName.BufferOffset);
dn_len = le16_to_cpu(authblob->DomainName.Length);
- if (blob_len < (u64)dn_off + dn_len || blob_len < (u64)nt_off + nt_len)
+ if (blob_len < (u64)dn_off + dn_len || blob_len < (u64)nt_off + nt_len ||
+ nt_len < CIFS_ENCPWD_SIZE)
return -EINVAL;
/* TODO : use domain name that imported from configuration file */
diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c
index 12be8386446a..56be077e5d8a 100644
--- a/fs/ksmbd/connection.c
+++ b/fs/ksmbd/connection.c
@@ -280,7 +280,7 @@ int ksmbd_conn_handler_loop(void *p)
{
struct ksmbd_conn *conn = (struct ksmbd_conn *)p;
struct ksmbd_transport *t = conn->transport;
- unsigned int pdu_size;
+ unsigned int pdu_size, max_allowed_pdu_size;
char hdr_buf[4] = {0,};
int size;
@@ -305,20 +305,36 @@ int ksmbd_conn_handler_loop(void *p)
pdu_size = get_rfc1002_len(hdr_buf);
ksmbd_debug(CONN, "RFC1002 header %u bytes\n", pdu_size);
+ if (conn->status == KSMBD_SESS_GOOD)
+ max_allowed_pdu_size =
+ SMB3_MAX_MSGSIZE + conn->vals->max_write_size;
+ else
+ max_allowed_pdu_size = SMB3_MAX_MSGSIZE;
+
+ if (pdu_size > max_allowed_pdu_size) {
+ pr_err_ratelimited("PDU length(%u) excceed maximum allowed pdu size(%u) on connection(%d)\n",
+ pdu_size, max_allowed_pdu_size,
+ conn->status);
+ break;
+ }
+
/*
* Check if pdu size is valid (min : smb header size,
* max : 0x00FFFFFF).
*/
if (pdu_size < __SMB2_HEADER_STRUCTURE_SIZE ||
pdu_size > MAX_STREAM_PROT_LEN) {
- continue;
+ break;
}
/* 4 for rfc1002 length field */
size = pdu_size + 4;
- conn->request_buf = kvmalloc(size, GFP_KERNEL);
+ conn->request_buf = kvmalloc(size,
+ GFP_KERNEL |
+ __GFP_NOWARN |
+ __GFP_NORETRY);
if (!conn->request_buf)
- continue;
+ break;
memcpy(conn->request_buf, hdr_buf, sizeof(hdr_buf));
if (!ksmbd_smb_request(conn))
diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h
index b6bd8311e6b4..fb8b2d566efb 100644
--- a/fs/ksmbd/ksmbd_netlink.h
+++ b/fs/ksmbd/ksmbd_netlink.h
@@ -106,7 +106,8 @@ struct ksmbd_startup_request {
__u32 sub_auth[3]; /* Subauth value for Security ID */
__u32 smb2_max_credits; /* MAX credits */
__u32 smbd_max_io_size; /* smbd read write size */
- __u32 reserved[127]; /* Reserved room */
+ __u32 max_connections; /* Number of maximum simultaneous connections */
+ __u32 reserved[126]; /* Reserved room */
__u32 ifc_list_sz; /* interfaces list size */
__s8 ____payload[];
};
diff --git a/fs/ksmbd/ndr.c b/fs/ksmbd/ndr.c
index 0ae8d08d85a8..3507d8f89074 100644
--- a/fs/ksmbd/ndr.c
+++ b/fs/ksmbd/ndr.c
@@ -242,7 +242,7 @@ int ndr_decode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da)
return ret;
if (da->version != 3 && da->version != 4) {
- pr_err("v%d version is not supported\n", da->version);
+ ksmbd_debug(VFS, "v%d version is not supported\n", da->version);
return -EINVAL;
}
@@ -251,7 +251,7 @@ int ndr_decode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da)
return ret;
if (da->version != version2) {
- pr_err("ndr version mismatched(version: %d, version2: %d)\n",
+ ksmbd_debug(VFS, "ndr version mismatched(version: %d, version2: %d)\n",
da->version, version2);
return -EINVAL;
}
@@ -338,7 +338,7 @@ static int ndr_encode_posix_acl_entry(struct ndr *n, struct xattr_smb_acl *acl)
}
int ndr_encode_posix_acl(struct ndr *n,
- struct user_namespace *user_ns,
+ struct mnt_idmap *idmap,
struct inode *inode,
struct xattr_smb_acl *acl,
struct xattr_smb_acl *def_acl)
@@ -374,11 +374,11 @@ int ndr_encode_posix_acl(struct ndr *n,
if (ret)
return ret;
- vfsuid = i_uid_into_vfsuid(user_ns, inode);
+ vfsuid = i_uid_into_vfsuid(idmap, inode);
ret = ndr_write_int64(n, from_kuid(&init_user_ns, vfsuid_into_kuid(vfsuid)));
if (ret)
return ret;
- vfsgid = i_gid_into_vfsgid(user_ns, inode);
+ vfsgid = i_gid_into_vfsgid(idmap, inode);
ret = ndr_write_int64(n, from_kgid(&init_user_ns, vfsgid_into_kgid(vfsgid)));
if (ret)
return ret;
@@ -457,7 +457,7 @@ int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl)
if (ret)
return ret;
if (acl->version != 4) {
- pr_err("v%d version is not supported\n", acl->version);
+ ksmbd_debug(VFS, "v%d version is not supported\n", acl->version);
return -EINVAL;
}
@@ -465,7 +465,7 @@ int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl)
if (ret)
return ret;
if (acl->version != version2) {
- pr_err("ndr version mismatched(version: %d, version2: %d)\n",
+ ksmbd_debug(VFS, "ndr version mismatched(version: %d, version2: %d)\n",
acl->version, version2);
return -EINVAL;
}
diff --git a/fs/ksmbd/ndr.h b/fs/ksmbd/ndr.h
index 60ca265d1bb0..f3c108c8cf4d 100644
--- a/fs/ksmbd/ndr.h
+++ b/fs/ksmbd/ndr.h
@@ -14,7 +14,7 @@ struct ndr {
int ndr_encode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da);
int ndr_decode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da);
-int ndr_encode_posix_acl(struct ndr *n, struct user_namespace *user_ns,
+int ndr_encode_posix_acl(struct ndr *n, struct mnt_idmap *idmap,
struct inode *inode, struct xattr_smb_acl *acl,
struct xattr_smb_acl *def_acl);
int ndr_encode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl);
diff --git a/fs/ksmbd/oplock.c b/fs/ksmbd/oplock.c
index d7d47b82451d..2e54ded4d92c 100644
--- a/fs/ksmbd/oplock.c
+++ b/fs/ksmbd/oplock.c
@@ -1608,9 +1608,9 @@ void create_posix_rsp_buf(char *cc, struct ksmbd_file *fp)
{
struct create_posix_rsp *buf;
struct inode *inode = file_inode(fp->filp);
- struct user_namespace *user_ns = file_mnt_user_ns(fp->filp);
- vfsuid_t vfsuid = i_uid_into_vfsuid(user_ns, inode);
- vfsgid_t vfsgid = i_gid_into_vfsgid(user_ns, inode);
+ struct mnt_idmap *idmap = file_mnt_idmap(fp->filp);
+ vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
+ vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
buf = (struct create_posix_rsp *)cc;
memset(buf, 0, sizeof(struct create_posix_rsp));
diff --git a/fs/ksmbd/server.h b/fs/ksmbd/server.h
index ac9d932f8c8a..db7278181760 100644
--- a/fs/ksmbd/server.h
+++ b/fs/ksmbd/server.h
@@ -41,6 +41,7 @@ struct ksmbd_server_config {
unsigned int share_fake_fscaps;
struct smb_sid domain_sid;
unsigned int auth_mechs;
+ unsigned int max_connections;
char *conf[SERVER_CONF_WORK_GROUP + 1];
};
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index 14d7f3599c63..4ef6e1e59a40 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -12,6 +12,7 @@
#include <linux/ethtool.h>
#include <linux/falloc.h>
#include <linux/mount.h>
+#include <linux/filelock.h>
#include "glob.h"
#include "smbfsctl.h"
@@ -1928,13 +1929,13 @@ int smb2_tree_connect(struct ksmbd_work *work)
if (conn->posix_ext_supported)
status.tree_conn->posix_extensions = true;
-out_err1:
rsp->StructureSize = cpu_to_le16(16);
+ inc_rfc1001_len(work->response_buf, 16);
+out_err1:
rsp->Capabilities = 0;
rsp->Reserved = 0;
/* default manual caching */
rsp->ShareFlags = SMB2_SHAREFLAG_MANUAL_CACHING;
- inc_rfc1001_len(work->response_buf, 16);
if (!IS_ERR(treename))
kfree(treename);
@@ -1967,6 +1968,9 @@ out_err1:
rsp->hdr.Status = STATUS_ACCESS_DENIED;
}
+ if (status.ret != KSMBD_TREE_CONN_STATUS_OK)
+ smb2_set_err_rsp(work);
+
return rc;
}
@@ -2189,7 +2193,7 @@ out:
static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len,
const struct path *path)
{
- struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+ struct mnt_idmap *idmap = mnt_idmap(path->mnt);
char *attr_name = NULL, *value;
int rc = 0;
unsigned int next = 0;
@@ -2225,7 +2229,7 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len,
value = (char *)&eabuf->name + eabuf->EaNameLength + 1;
if (!eabuf->EaValueLength) {
- rc = ksmbd_vfs_casexattr_len(user_ns,
+ rc = ksmbd_vfs_casexattr_len(idmap,
path->dentry,
attr_name,
XATTR_USER_PREFIX_LEN +
@@ -2233,7 +2237,7 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len,
/* delete the EA only when it exits */
if (rc > 0) {
- rc = ksmbd_vfs_remove_xattr(user_ns,
+ rc = ksmbd_vfs_remove_xattr(idmap,
path->dentry,
attr_name);
@@ -2248,7 +2252,7 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len,
/* if the EA doesn't exist, just do nothing. */
rc = 0;
} else {
- rc = ksmbd_vfs_setxattr(user_ns,
+ rc = ksmbd_vfs_setxattr(idmap,
path->dentry, attr_name, value,
le16_to_cpu(eabuf->EaValueLength), 0);
if (rc < 0) {
@@ -2278,7 +2282,7 @@ static noinline int smb2_set_stream_name_xattr(const struct path *path,
struct ksmbd_file *fp,
char *stream_name, int s_type)
{
- struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+ struct mnt_idmap *idmap = mnt_idmap(path->mnt);
size_t xattr_stream_size;
char *xattr_stream_name;
int rc;
@@ -2294,7 +2298,7 @@ static noinline int smb2_set_stream_name_xattr(const struct path *path,
fp->stream.size = xattr_stream_size;
/* Check if there is stream prefix in xattr space */
- rc = ksmbd_vfs_casexattr_len(user_ns,
+ rc = ksmbd_vfs_casexattr_len(idmap,
path->dentry,
xattr_stream_name,
xattr_stream_size);
@@ -2306,7 +2310,7 @@ static noinline int smb2_set_stream_name_xattr(const struct path *path,
return -EBADF;
}
- rc = ksmbd_vfs_setxattr(user_ns, path->dentry,
+ rc = ksmbd_vfs_setxattr(idmap, path->dentry,
xattr_stream_name, NULL, 0, 0);
if (rc < 0)
pr_err("Failed to store XATTR stream name :%d\n", rc);
@@ -2315,7 +2319,7 @@ static noinline int smb2_set_stream_name_xattr(const struct path *path,
static int smb2_remove_smb_xattrs(const struct path *path)
{
- struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+ struct mnt_idmap *idmap = mnt_idmap(path->mnt);
char *name, *xattr_list = NULL;
ssize_t xattr_list_len;
int err = 0;
@@ -2335,7 +2339,7 @@ static int smb2_remove_smb_xattrs(const struct path *path)
if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
!strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX,
STREAM_PREFIX_LEN)) {
- err = ksmbd_vfs_remove_xattr(user_ns, path->dentry,
+ err = ksmbd_vfs_remove_xattr(idmap, path->dentry,
name);
if (err)
ksmbd_debug(SMB, "remove xattr failed : %s\n",
@@ -2382,7 +2386,7 @@ static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, const struct path *
da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME |
XATTR_DOSINFO_ITIME;
- rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_user_ns(path->mnt),
+ rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt),
path->dentry, &da);
if (rc)
ksmbd_debug(SMB, "failed to store file attribute into xattr\n");
@@ -2401,7 +2405,7 @@ static void smb2_update_xattrs(struct ksmbd_tree_connect *tcon,
KSMBD_SHARE_FLAG_STORE_DOS_ATTRS))
return;
- rc = ksmbd_vfs_get_dos_attrib_xattr(mnt_user_ns(path->mnt),
+ rc = ksmbd_vfs_get_dos_attrib_xattr(mnt_idmap(path->mnt),
path->dentry, &da);
if (rc > 0) {
fp->f_ci->m_fattr = cpu_to_le32(da.attr);
@@ -2476,11 +2480,11 @@ static int smb2_create_sd_buffer(struct ksmbd_work *work,
}
static void ksmbd_acls_fattr(struct smb_fattr *fattr,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct inode *inode)
{
- vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
- vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
+ vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
+ vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
fattr->cf_uid = vfsuid_into_kuid(vfsuid);
fattr->cf_gid = vfsgid_into_kgid(vfsgid);
@@ -2512,7 +2516,7 @@ int smb2_open(struct ksmbd_work *work)
struct ksmbd_share_config *share = tcon->share_conf;
struct ksmbd_file *fp = NULL;
struct file *filp = NULL;
- struct user_namespace *user_ns = NULL;
+ struct mnt_idmap *idmap = NULL;
struct kstat stat;
struct create_context *context;
struct lease_ctx_info *lc = NULL;
@@ -2765,7 +2769,7 @@ int smb2_open(struct ksmbd_work *work)
rc = 0;
} else {
file_present = true;
- user_ns = mnt_user_ns(path.mnt);
+ idmap = mnt_idmap(path.mnt);
}
if (stream_name) {
if (req->CreateOptions & FILE_DIRECTORY_FILE_LE) {
@@ -2828,7 +2832,7 @@ int smb2_open(struct ksmbd_work *work)
if (!file_present) {
daccess = cpu_to_le32(GENERIC_ALL_FLAGS);
} else {
- rc = ksmbd_vfs_query_maximal_access(user_ns,
+ rc = ksmbd_vfs_query_maximal_access(idmap,
path.dentry,
&daccess);
if (rc)
@@ -2864,7 +2868,7 @@ int smb2_open(struct ksmbd_work *work)
}
created = true;
- user_ns = mnt_user_ns(path.mnt);
+ idmap = mnt_idmap(path.mnt);
if (ea_buf) {
if (le32_to_cpu(ea_buf->ccontext.DataLength) <
sizeof(struct smb2_ea_info)) {
@@ -2886,7 +2890,7 @@ int smb2_open(struct ksmbd_work *work)
* is already granted.
*/
if (daccess & ~(FILE_READ_ATTRIBUTES_LE | FILE_READ_CONTROL_LE)) {
- rc = inode_permission(user_ns,
+ rc = inode_permission(idmap,
d_inode(path.dentry),
may_flags);
if (rc)
@@ -2894,7 +2898,7 @@ int smb2_open(struct ksmbd_work *work)
if ((daccess & FILE_DELETE_LE) ||
(req->CreateOptions & FILE_DELETE_ON_CLOSE_LE)) {
- rc = ksmbd_vfs_may_delete(user_ns,
+ rc = ksmbd_vfs_may_delete(idmap,
path.dentry);
if (rc)
goto err_out;
@@ -2957,7 +2961,7 @@ int smb2_open(struct ksmbd_work *work)
int posix_acl_rc;
struct inode *inode = d_inode(path.dentry);
- posix_acl_rc = ksmbd_vfs_inherit_posix_acl(user_ns,
+ posix_acl_rc = ksmbd_vfs_inherit_posix_acl(idmap,
path.dentry,
d_inode(path.dentry->d_parent));
if (posix_acl_rc)
@@ -2973,7 +2977,7 @@ int smb2_open(struct ksmbd_work *work)
rc = smb2_create_sd_buffer(work, req, &path);
if (rc) {
if (posix_acl_rc)
- ksmbd_vfs_set_init_posix_acl(user_ns,
+ ksmbd_vfs_set_init_posix_acl(idmap,
path.dentry);
if (test_share_config_flag(work->tcon->share_conf,
@@ -2982,7 +2986,7 @@ int smb2_open(struct ksmbd_work *work)
struct smb_ntsd *pntsd;
int pntsd_size, ace_num = 0;
- ksmbd_acls_fattr(&fattr, user_ns, inode);
+ ksmbd_acls_fattr(&fattr, idmap, inode);
if (fattr.cf_acls)
ace_num = fattr.cf_acls->a_count;
if (fattr.cf_dacls)
@@ -2996,7 +3000,7 @@ int smb2_open(struct ksmbd_work *work)
if (!pntsd)
goto err_out;
- rc = build_sec_desc(user_ns,
+ rc = build_sec_desc(idmap,
pntsd, NULL, 0,
OWNER_SECINFO |
GROUP_SECINFO |
@@ -3010,7 +3014,7 @@ int smb2_open(struct ksmbd_work *work)
}
rc = ksmbd_vfs_set_sd_xattr(conn,
- user_ns,
+ idmap,
path.dentry,
pntsd,
pntsd_size);
@@ -3206,7 +3210,7 @@ int smb2_open(struct ksmbd_work *work)
struct create_context *mxac_ccontext;
if (maximal_access == 0)
- ksmbd_vfs_query_maximal_access(user_ns,
+ ksmbd_vfs_query_maximal_access(idmap,
path.dentry,
&maximal_access);
mxac_ccontext = (struct create_context *)(rsp->Buffer +
@@ -3631,7 +3635,7 @@ static void unlock_dir(struct ksmbd_file *dir_fp)
static int process_query_dir_entries(struct smb2_query_dir_private *priv)
{
- struct user_namespace *user_ns = file_mnt_user_ns(priv->dir_fp->filp);
+ struct mnt_idmap *idmap = file_mnt_idmap(priv->dir_fp->filp);
struct kstat kstat;
struct ksmbd_kstat ksmbd_kstat;
int rc;
@@ -3644,7 +3648,7 @@ static int process_query_dir_entries(struct smb2_query_dir_private *priv)
return -EINVAL;
lock_dir(priv->dir_fp);
- dent = lookup_one(user_ns, priv->d_info->name,
+ dent = lookup_one(idmap, priv->d_info->name,
priv->dir_fp->filp->f_path.dentry,
priv->d_info->name_len);
unlock_dir(priv->dir_fp);
@@ -3665,7 +3669,7 @@ static int process_query_dir_entries(struct smb2_query_dir_private *priv)
ksmbd_kstat.kstat = &kstat;
if (priv->info_level != FILE_NAMES_INFORMATION)
ksmbd_vfs_fill_dentry_attrs(priv->work,
- user_ns,
+ idmap,
dent,
&ksmbd_kstat);
@@ -3895,7 +3899,7 @@ int smb2_query_dir(struct ksmbd_work *work)
}
if (!(dir_fp->daccess & FILE_LIST_DIRECTORY_LE) ||
- inode_permission(file_mnt_user_ns(dir_fp->filp),
+ inode_permission(file_mnt_idmap(dir_fp->filp),
file_inode(dir_fp->filp),
MAY_READ | MAY_EXEC)) {
pr_err("no right to enumerate directory (%pD)\n", dir_fp->filp);
@@ -4161,7 +4165,7 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp,
ssize_t buf_free_len, alignment_bytes, next_offset, rsp_data_cnt = 0;
struct smb2_ea_info_req *ea_req = NULL;
const struct path *path;
- struct user_namespace *user_ns = file_mnt_user_ns(fp->filp);
+ struct mnt_idmap *idmap = file_mnt_idmap(fp->filp);
if (!(fp->daccess & FILE_READ_EA_LE)) {
pr_err("Not permitted to read ext attr : 0x%x\n",
@@ -4241,7 +4245,7 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp,
buf_free_len -= (offsetof(struct smb2_ea_info, name) +
name_len + 1);
/* bailout if xattr can't fit in buf_free_len */
- value_len = ksmbd_vfs_getxattr(user_ns, path->dentry,
+ value_len = ksmbd_vfs_getxattr(idmap, path->dentry,
name, &buf);
if (value_len <= 0) {
rc = -ENOENT;
@@ -4331,7 +4335,7 @@ static int get_file_basic_info(struct smb2_query_info_rsp *rsp,
}
basic_info = (struct smb2_file_basic_info *)rsp->Buffer;
- generic_fillattr(file_mnt_user_ns(fp->filp), file_inode(fp->filp),
+ generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp),
&stat);
basic_info->CreationTime = cpu_to_le64(fp->create_time);
time = ksmbd_UnixTimeToNT(stat.atime);
@@ -4372,7 +4376,7 @@ static void get_file_standard_info(struct smb2_query_info_rsp *rsp,
struct kstat stat;
inode = file_inode(fp->filp);
- generic_fillattr(file_mnt_user_ns(fp->filp), inode, &stat);
+ generic_fillattr(file_mnt_idmap(fp->filp), inode, &stat);
sinfo = (struct smb2_file_standard_info *)rsp->Buffer;
delete_pending = ksmbd_inode_pending_delete(fp);
@@ -4426,7 +4430,7 @@ static int get_file_all_info(struct ksmbd_work *work,
return PTR_ERR(filename);
inode = file_inode(fp->filp);
- generic_fillattr(file_mnt_user_ns(fp->filp), inode, &stat);
+ generic_fillattr(file_mnt_idmap(fp->filp), inode, &stat);
ksmbd_debug(SMB, "filename = %s\n", filename);
delete_pending = ksmbd_inode_pending_delete(fp);
@@ -4503,7 +4507,7 @@ static void get_file_stream_info(struct ksmbd_work *work,
int buf_free_len;
struct smb2_query_info_req *req = ksmbd_req_buf_next(work);
- generic_fillattr(file_mnt_user_ns(fp->filp), file_inode(fp->filp),
+ generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp),
&stat);
file_info = (struct smb2_file_stream_info *)rsp->Buffer;
@@ -4594,7 +4598,7 @@ static void get_file_internal_info(struct smb2_query_info_rsp *rsp,
struct smb2_file_internal_info *file_info;
struct kstat stat;
- generic_fillattr(file_mnt_user_ns(fp->filp), file_inode(fp->filp),
+ generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp),
&stat);
file_info = (struct smb2_file_internal_info *)rsp->Buffer;
file_info->IndexNumber = cpu_to_le64(stat.ino);
@@ -4620,7 +4624,7 @@ static int get_file_network_open_info(struct smb2_query_info_rsp *rsp,
file_info = (struct smb2_file_ntwrk_info *)rsp->Buffer;
inode = file_inode(fp->filp);
- generic_fillattr(file_mnt_user_ns(fp->filp), inode, &stat);
+ generic_fillattr(file_mnt_idmap(fp->filp), inode, &stat);
file_info->CreationTime = cpu_to_le64(fp->create_time);
time = ksmbd_UnixTimeToNT(stat.atime);
@@ -4681,7 +4685,7 @@ static void get_file_compression_info(struct smb2_query_info_rsp *rsp,
struct smb2_file_comp_info *file_info;
struct kstat stat;
- generic_fillattr(file_mnt_user_ns(fp->filp), file_inode(fp->filp),
+ generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp),
&stat);
file_info = (struct smb2_file_comp_info *)rsp->Buffer;
@@ -4722,9 +4726,9 @@ static int find_file_posix_info(struct smb2_query_info_rsp *rsp,
{
struct smb311_posix_qinfo *file_info;
struct inode *inode = file_inode(fp->filp);
- struct user_namespace *user_ns = file_mnt_user_ns(fp->filp);
- vfsuid_t vfsuid = i_uid_into_vfsuid(user_ns, inode);
- vfsgid_t vfsgid = i_gid_into_vfsgid(user_ns, inode);
+ struct mnt_idmap *idmap = file_mnt_idmap(fp->filp);
+ vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
+ vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
u64 time;
int out_buf_len = sizeof(struct smb311_posix_qinfo) + 32;
@@ -5124,7 +5128,7 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
struct smb2_query_info_rsp *rsp)
{
struct ksmbd_file *fp;
- struct user_namespace *user_ns;
+ struct mnt_idmap *idmap;
struct smb_ntsd *pntsd = (struct smb_ntsd *)rsp->Buffer, *ppntsd = NULL;
struct smb_fattr fattr = {{0}};
struct inode *inode;
@@ -5171,19 +5175,19 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
if (!fp)
return -ENOENT;
- user_ns = file_mnt_user_ns(fp->filp);
+ idmap = file_mnt_idmap(fp->filp);
inode = file_inode(fp->filp);
- ksmbd_acls_fattr(&fattr, user_ns, inode);
+ ksmbd_acls_fattr(&fattr, idmap, inode);
if (test_share_config_flag(work->tcon->share_conf,
KSMBD_SHARE_FLAG_ACL_XATTR))
- ppntsd_size = ksmbd_vfs_get_sd_xattr(work->conn, user_ns,
+ ppntsd_size = ksmbd_vfs_get_sd_xattr(work->conn, idmap,
fp->filp->f_path.dentry,
&ppntsd);
/* Check if sd buffer size exceeds response buffer size */
if (smb2_resp_buf_len(work, 8) > ppntsd_size)
- rc = build_sec_desc(user_ns, pntsd, ppntsd, ppntsd_size,
+ rc = build_sec_desc(idmap, pntsd, ppntsd, ppntsd_size,
addition_info, &secdesclen, &fattr);
posix_acl_release(fattr.cf_acls);
posix_acl_release(fattr.cf_dacls);
@@ -5413,7 +5417,7 @@ int smb2_echo(struct ksmbd_work *work)
static int smb2_rename(struct ksmbd_work *work,
struct ksmbd_file *fp,
- struct user_namespace *user_ns,
+ struct mnt_idmap *idmap,
struct smb2_file_rename_info *file_info,
struct nls_table *local_nls)
{
@@ -5476,7 +5480,7 @@ static int smb2_rename(struct ksmbd_work *work,
if (rc)
goto out;
- rc = ksmbd_vfs_setxattr(user_ns,
+ rc = ksmbd_vfs_setxattr(idmap,
fp->filp->f_path.dentry,
xattr_stream_name,
NULL, 0, 0);
@@ -5615,7 +5619,7 @@ static int set_file_basic_info(struct ksmbd_file *fp,
struct iattr attrs;
struct file *filp;
struct inode *inode;
- struct user_namespace *user_ns;
+ struct mnt_idmap *idmap;
int rc = 0;
if (!(fp->daccess & FILE_WRITE_ATTRIBUTES_LE))
@@ -5624,7 +5628,7 @@ static int set_file_basic_info(struct ksmbd_file *fp,
attrs.ia_valid = 0;
filp = fp->filp;
inode = file_inode(filp);
- user_ns = file_mnt_user_ns(filp);
+ idmap = file_mnt_idmap(filp);
if (file_info->CreationTime)
fp->create_time = le64_to_cpu(file_info->CreationTime);
@@ -5668,7 +5672,7 @@ static int set_file_basic_info(struct ksmbd_file *fp,
da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME |
XATTR_DOSINFO_ITIME;
- rc = ksmbd_vfs_set_dos_attrib_xattr(user_ns,
+ rc = ksmbd_vfs_set_dos_attrib_xattr(idmap,
filp->f_path.dentry, &da);
if (rc)
ksmbd_debug(SMB,
@@ -5686,7 +5690,7 @@ static int set_file_basic_info(struct ksmbd_file *fp,
inode_lock(inode);
inode->i_ctime = attrs.ia_ctime;
attrs.ia_valid &= ~ATTR_CTIME;
- rc = notify_change(user_ns, dentry, &attrs, NULL);
+ rc = notify_change(idmap, dentry, &attrs, NULL);
inode_unlock(inode);
}
return rc;
@@ -5779,7 +5783,7 @@ static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp,
struct smb2_file_rename_info *rename_info,
unsigned int buf_len)
{
- struct user_namespace *user_ns;
+ struct mnt_idmap *idmap;
struct ksmbd_file *parent_fp;
struct dentry *parent;
struct dentry *dentry = fp->filp->f_path.dentry;
@@ -5794,12 +5798,12 @@ static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp,
le32_to_cpu(rename_info->FileNameLength))
return -EINVAL;
- user_ns = file_mnt_user_ns(fp->filp);
+ idmap = file_mnt_idmap(fp->filp);
if (ksmbd_stream_fd(fp))
goto next;
parent = dget_parent(dentry);
- ret = ksmbd_vfs_lock_parent(user_ns, parent, dentry);
+ ret = ksmbd_vfs_lock_parent(idmap, parent, dentry);
if (ret) {
dput(parent);
return ret;
@@ -5818,7 +5822,7 @@ static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp,
ksmbd_fd_put(work, parent_fp);
}
next:
- return smb2_rename(work, fp, user_ns, rename_info,
+ return smb2_rename(work, fp, idmap, rename_info,
work->conn->local_nls);
}
@@ -7527,14 +7531,14 @@ static inline int fsctl_set_sparse(struct ksmbd_work *work, u64 id,
struct file_sparse *sparse)
{
struct ksmbd_file *fp;
- struct user_namespace *user_ns;
+ struct mnt_idmap *idmap;
int ret = 0;
__le32 old_fattr;
fp = ksmbd_lookup_fd_fast(work, id);
if (!fp)
return -ENOENT;
- user_ns = file_mnt_user_ns(fp->filp);
+ idmap = file_mnt_idmap(fp->filp);
old_fattr = fp->f_ci->m_fattr;
if (sparse->SetSparse)
@@ -7547,13 +7551,13 @@ static inline int fsctl_set_sparse(struct ksmbd_work *work, u64 id,
KSMBD_SHARE_FLAG_STORE_DOS_ATTRS)) {
struct xattr_dos_attrib da;
- ret = ksmbd_vfs_get_dos_attrib_xattr(user_ns,
+ ret = ksmbd_vfs_get_dos_attrib_xattr(idmap,
fp->filp->f_path.dentry, &da);
if (ret <= 0)
goto out;
da.attr = le32_to_cpu(fp->f_ci->m_fattr);
- ret = ksmbd_vfs_set_dos_attrib_xattr(user_ns,
+ ret = ksmbd_vfs_set_dos_attrib_xattr(idmap,
fp->filp->f_path.dentry, &da);
if (ret)
fp->f_ci->m_fattr = old_fattr;
@@ -8660,6 +8664,7 @@ int smb3_decrypt_req(struct ksmbd_work *work)
bool smb3_11_final_sess_setup_resp(struct ksmbd_work *work)
{
struct ksmbd_conn *conn = work->conn;
+ struct ksmbd_session *sess = work->sess;
struct smb2_hdr *rsp = smb2_get_msg(work->response_buf);
if (conn->dialect < SMB30_PROT_ID)
@@ -8669,6 +8674,7 @@ bool smb3_11_final_sess_setup_resp(struct ksmbd_work *work)
rsp = ksmbd_resp_buf_next(work);
if (le16_to_cpu(rsp->Command) == SMB2_SESSION_SETUP_HE &&
+ sess->user && !user_guest(sess->user) &&
rsp->Status == STATUS_SUCCESS)
return true;
return false;
diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h
index aa5dbe54f5a1..0c8a770fe318 100644
--- a/fs/ksmbd/smb2pdu.h
+++ b/fs/ksmbd/smb2pdu.h
@@ -24,8 +24,9 @@
#define SMB21_DEFAULT_IOSIZE (1024 * 1024)
#define SMB3_DEFAULT_TRANS_SIZE (1024 * 1024)
-#define SMB3_MIN_IOSIZE (64 * 1024)
-#define SMB3_MAX_IOSIZE (8 * 1024 * 1024)
+#define SMB3_MIN_IOSIZE (64 * 1024)
+#define SMB3_MAX_IOSIZE (8 * 1024 * 1024)
+#define SMB3_MAX_MSGSIZE (4 * 4096)
/*
* Definitions for SMB2 Protocol Data Units (network frames)
diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c
index 2a4fbbd55b91..fa2b54df6ee6 100644
--- a/fs/ksmbd/smb_common.c
+++ b/fs/ksmbd/smb_common.c
@@ -307,7 +307,7 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level,
{
int i, rc = 0;
struct ksmbd_conn *conn = work->conn;
- struct user_namespace *user_ns = file_mnt_user_ns(dir->filp);
+ struct mnt_idmap *idmap = file_mnt_idmap(dir->filp);
for (i = 0; i < 2; i++) {
struct kstat kstat;
@@ -333,7 +333,7 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level,
ksmbd_kstat.kstat = &kstat;
ksmbd_vfs_fill_dentry_attrs(work,
- user_ns,
+ idmap,
dentry,
&ksmbd_kstat);
rc = fn(conn, info_level, d_info, &ksmbd_kstat);
diff --git a/fs/ksmbd/smbacl.c b/fs/ksmbd/smbacl.c
index ab5c68cc0e13..6d6cfb6957a9 100644
--- a/fs/ksmbd/smbacl.c
+++ b/fs/ksmbd/smbacl.c
@@ -254,7 +254,7 @@ void id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid)
ssid->num_subauth++;
}
-static int sid_to_id(struct user_namespace *user_ns,
+static int sid_to_id(struct mnt_idmap *idmap,
struct smb_sid *psid, uint sidtype,
struct smb_fattr *fattr)
{
@@ -276,7 +276,7 @@ static int sid_to_id(struct user_namespace *user_ns,
id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]);
uid = KUIDT_INIT(id);
- uid = from_vfsuid(user_ns, &init_user_ns, VFSUIDT_INIT(uid));
+ uid = from_vfsuid(idmap, &init_user_ns, VFSUIDT_INIT(uid));
if (uid_valid(uid)) {
fattr->cf_uid = uid;
rc = 0;
@@ -287,7 +287,7 @@ static int sid_to_id(struct user_namespace *user_ns,
id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]);
gid = KGIDT_INIT(id);
- gid = from_vfsgid(user_ns, &init_user_ns, VFSGIDT_INIT(gid));
+ gid = from_vfsgid(idmap, &init_user_ns, VFSGIDT_INIT(gid));
if (gid_valid(gid)) {
fattr->cf_gid = gid;
rc = 0;
@@ -362,7 +362,7 @@ void free_acl_state(struct posix_acl_state *state)
kfree(state->groups);
}
-static void parse_dacl(struct user_namespace *user_ns,
+static void parse_dacl(struct mnt_idmap *idmap,
struct smb_acl *pdacl, char *end_of_acl,
struct smb_sid *pownersid, struct smb_sid *pgrpsid,
struct smb_fattr *fattr)
@@ -489,7 +489,7 @@ static void parse_dacl(struct user_namespace *user_ns,
acl_mode = access_flags_to_mode(fattr, ppace[i]->access_req,
ppace[i]->type);
temp_fattr.cf_uid = INVALID_UID;
- ret = sid_to_id(user_ns, &ppace[i]->sid, SIDOWNER, &temp_fattr);
+ ret = sid_to_id(idmap, &ppace[i]->sid, SIDOWNER, &temp_fattr);
if (ret || uid_eq(temp_fattr.cf_uid, INVALID_UID)) {
pr_err("%s: Error %d mapping Owner SID to uid\n",
__func__, ret);
@@ -575,7 +575,7 @@ static void parse_dacl(struct user_namespace *user_ns,
free_acl_state(&default_acl_state);
}
-static void set_posix_acl_entries_dacl(struct user_namespace *user_ns,
+static void set_posix_acl_entries_dacl(struct mnt_idmap *idmap,
struct smb_ace *pndace,
struct smb_fattr *fattr, u32 *num_aces,
u16 *size, u32 nt_aces_num)
@@ -600,14 +600,14 @@ static void set_posix_acl_entries_dacl(struct user_namespace *user_ns,
uid_t uid;
unsigned int sid_type = SIDOWNER;
- uid = posix_acl_uid_translate(user_ns, pace);
+ uid = posix_acl_uid_translate(idmap, pace);
if (!uid)
sid_type = SIDUNIX_USER;
id_to_sid(uid, sid_type, sid);
} else if (pace->e_tag == ACL_GROUP) {
gid_t gid;
- gid = posix_acl_gid_translate(user_ns, pace);
+ gid = posix_acl_gid_translate(idmap, pace);
id_to_sid(gid, SIDUNIX_GROUP, sid);
} else if (pace->e_tag == ACL_OTHER && !nt_aces_num) {
smb_copy_sid(sid, &sid_everyone);
@@ -666,12 +666,12 @@ posix_default_acl:
if (pace->e_tag == ACL_USER) {
uid_t uid;
- uid = posix_acl_uid_translate(user_ns, pace);
+ uid = posix_acl_uid_translate(idmap, pace);
id_to_sid(uid, SIDCREATOR_OWNER, sid);
} else if (pace->e_tag == ACL_GROUP) {
gid_t gid;
- gid = posix_acl_gid_translate(user_ns, pace);
+ gid = posix_acl_gid_translate(idmap, pace);
id_to_sid(gid, SIDCREATOR_GROUP, sid);
} else {
kfree(sid);
@@ -689,7 +689,7 @@ posix_default_acl:
}
}
-static void set_ntacl_dacl(struct user_namespace *user_ns,
+static void set_ntacl_dacl(struct mnt_idmap *idmap,
struct smb_acl *pndacl,
struct smb_acl *nt_dacl,
unsigned int aces_size,
@@ -723,13 +723,13 @@ static void set_ntacl_dacl(struct user_namespace *user_ns,
}
}
- set_posix_acl_entries_dacl(user_ns, pndace, fattr,
+ set_posix_acl_entries_dacl(idmap, pndace, fattr,
&num_aces, &size, nt_num_aces);
pndacl->num_aces = cpu_to_le32(num_aces);
pndacl->size = cpu_to_le16(le16_to_cpu(pndacl->size) + size);
}
-static void set_mode_dacl(struct user_namespace *user_ns,
+static void set_mode_dacl(struct mnt_idmap *idmap,
struct smb_acl *pndacl, struct smb_fattr *fattr)
{
struct smb_ace *pace, *pndace;
@@ -741,7 +741,7 @@ static void set_mode_dacl(struct user_namespace *user_ns,
pace = pndace = (struct smb_ace *)((char *)pndacl + sizeof(struct smb_acl));
if (fattr->cf_acls) {
- set_posix_acl_entries_dacl(user_ns, pndace, fattr,
+ set_posix_acl_entries_dacl(idmap, pndace, fattr,
&num_aces, &size, num_aces);
goto out;
}
@@ -808,7 +808,7 @@ static int parse_sid(struct smb_sid *psid, char *end_of_acl)
}
/* Convert CIFS ACL to POSIX form */
-int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
+int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd,
int acl_len, struct smb_fattr *fattr)
{
int rc = 0;
@@ -851,7 +851,7 @@ int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
return rc;
}
- rc = sid_to_id(user_ns, owner_sid_ptr, SIDOWNER, fattr);
+ rc = sid_to_id(idmap, owner_sid_ptr, SIDOWNER, fattr);
if (rc) {
pr_err("%s: Error %d mapping Owner SID to uid\n",
__func__, rc);
@@ -866,7 +866,7 @@ int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
__func__, rc);
return rc;
}
- rc = sid_to_id(user_ns, group_sid_ptr, SIDUNIX_GROUP, fattr);
+ rc = sid_to_id(idmap, group_sid_ptr, SIDUNIX_GROUP, fattr);
if (rc) {
pr_err("%s: Error %d mapping Group SID to gid\n",
__func__, rc);
@@ -881,7 +881,7 @@ int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
pntsd->type |= cpu_to_le16(DACL_PROTECTED);
if (dacloffset) {
- parse_dacl(user_ns, dacl_ptr, end_of_acl,
+ parse_dacl(idmap, dacl_ptr, end_of_acl,
owner_sid_ptr, group_sid_ptr, fattr);
}
@@ -889,7 +889,7 @@ int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
}
/* Convert permission bits from mode to equivalent CIFS ACL */
-int build_sec_desc(struct user_namespace *user_ns,
+int build_sec_desc(struct mnt_idmap *idmap,
struct smb_ntsd *pntsd, struct smb_ntsd *ppntsd,
int ppntsd_size, int addition_info, __u32 *secdesclen,
struct smb_fattr *fattr)
@@ -950,7 +950,7 @@ int build_sec_desc(struct user_namespace *user_ns,
dacl_ptr->num_aces = 0;
if (!ppntsd) {
- set_mode_dacl(user_ns, dacl_ptr, fattr);
+ set_mode_dacl(idmap, dacl_ptr, fattr);
} else {
struct smb_acl *ppdacl_ptr;
unsigned int dacl_offset = le32_to_cpu(ppntsd->dacloffset);
@@ -966,7 +966,7 @@ int build_sec_desc(struct user_namespace *user_ns,
ppdacl_size < sizeof(struct smb_acl))
goto out;
- set_ntacl_dacl(user_ns, dacl_ptr, ppdacl_ptr,
+ set_ntacl_dacl(idmap, dacl_ptr, ppdacl_ptr,
ntacl_size - sizeof(struct smb_acl),
nowner_sid_ptr, ngroup_sid_ptr,
fattr);
@@ -1002,13 +1002,13 @@ int smb_inherit_dacl(struct ksmbd_conn *conn,
struct smb_ntsd *parent_pntsd = NULL;
struct smb_sid owner_sid, group_sid;
struct dentry *parent = path->dentry->d_parent;
- struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+ struct mnt_idmap *idmap = mnt_idmap(path->mnt);
int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0, pdacl_size;
int rc = 0, num_aces, dacloffset, pntsd_type, pntsd_size, acl_len, aces_size;
char *aces_base;
bool is_dir = S_ISDIR(d_inode(path->dentry)->i_mode);
- pntsd_size = ksmbd_vfs_get_sd_xattr(conn, user_ns,
+ pntsd_size = ksmbd_vfs_get_sd_xattr(conn, idmap,
parent, &parent_pntsd);
if (pntsd_size <= 0)
return -ENOENT;
@@ -1162,7 +1162,7 @@ pass:
pntsd_size += sizeof(struct smb_acl) + nt_size;
}
- ksmbd_vfs_set_sd_xattr(conn, user_ns,
+ ksmbd_vfs_set_sd_xattr(conn, idmap,
path->dentry, pntsd, pntsd_size);
kfree(pntsd);
}
@@ -1190,7 +1190,7 @@ bool smb_inherit_flags(int flags, bool is_dir)
int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path,
__le32 *pdaccess, int uid)
{
- struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+ struct mnt_idmap *idmap = mnt_idmap(path->mnt);
struct smb_ntsd *pntsd = NULL;
struct smb_acl *pdacl;
struct posix_acl *posix_acls;
@@ -1206,7 +1206,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path,
unsigned short ace_size;
ksmbd_debug(SMB, "check permission using windows acl\n");
- pntsd_size = ksmbd_vfs_get_sd_xattr(conn, user_ns,
+ pntsd_size = ksmbd_vfs_get_sd_xattr(conn, idmap,
path->dentry, &pntsd);
if (pntsd_size <= 0 || !pntsd)
goto err_out;
@@ -1296,9 +1296,9 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path,
pa_entry = posix_acls->a_entries;
for (i = 0; i < posix_acls->a_count; i++, pa_entry++) {
if (pa_entry->e_tag == ACL_USER)
- id = posix_acl_uid_translate(user_ns, pa_entry);
+ id = posix_acl_uid_translate(idmap, pa_entry);
else if (pa_entry->e_tag == ACL_GROUP)
- id = posix_acl_gid_translate(user_ns, pa_entry);
+ id = posix_acl_gid_translate(idmap, pa_entry);
else
continue;
@@ -1360,14 +1360,14 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
int rc;
struct smb_fattr fattr = {{0}};
struct inode *inode = d_inode(path->dentry);
- struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+ struct mnt_idmap *idmap = mnt_idmap(path->mnt);
struct iattr newattrs;
fattr.cf_uid = INVALID_UID;
fattr.cf_gid = INVALID_GID;
fattr.cf_mode = inode->i_mode;
- rc = parse_sec_desc(user_ns, pntsd, ntsd_len, &fattr);
+ rc = parse_sec_desc(idmap, pntsd, ntsd_len, &fattr);
if (rc)
goto out;
@@ -1383,17 +1383,17 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
newattrs.ia_valid |= ATTR_MODE;
newattrs.ia_mode = (inode->i_mode & ~0777) | (fattr.cf_mode & 0777);
- ksmbd_vfs_remove_acl_xattrs(user_ns, path->dentry);
+ ksmbd_vfs_remove_acl_xattrs(idmap, path->dentry);
/* Update posix acls */
if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && fattr.cf_dacls) {
- rc = set_posix_acl(user_ns, path->dentry,
+ rc = set_posix_acl(idmap, path->dentry,
ACL_TYPE_ACCESS, fattr.cf_acls);
if (rc < 0)
ksmbd_debug(SMB,
"Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
rc);
if (S_ISDIR(inode->i_mode) && fattr.cf_dacls) {
- rc = set_posix_acl(user_ns, path->dentry,
+ rc = set_posix_acl(idmap, path->dentry,
ACL_TYPE_DEFAULT, fattr.cf_dacls);
if (rc)
ksmbd_debug(SMB,
@@ -1403,7 +1403,7 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
}
inode_lock(inode);
- rc = notify_change(user_ns, path->dentry, &newattrs, NULL);
+ rc = notify_change(idmap, path->dentry, &newattrs, NULL);
inode_unlock(inode);
if (rc)
goto out;
@@ -1414,8 +1414,8 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
if (test_share_config_flag(tcon->share_conf, KSMBD_SHARE_FLAG_ACL_XATTR)) {
/* Update WinACL in xattr */
- ksmbd_vfs_remove_sd_xattrs(user_ns, path->dentry);
- ksmbd_vfs_set_sd_xattr(conn, user_ns,
+ ksmbd_vfs_remove_sd_xattrs(idmap, path->dentry);
+ ksmbd_vfs_set_sd_xattr(conn, idmap,
path->dentry, pntsd, ntsd_len);
}
diff --git a/fs/ksmbd/smbacl.h b/fs/ksmbd/smbacl.h
index 618f2e0236b3..49a8c292bd2e 100644
--- a/fs/ksmbd/smbacl.h
+++ b/fs/ksmbd/smbacl.h
@@ -190,9 +190,9 @@ struct posix_acl_state {
struct posix_ace_state_array *groups;
};
-int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
+int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd,
int acl_len, struct smb_fattr *fattr);
-int build_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
+int build_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd,
struct smb_ntsd *ppntsd, int ppntsd_size, int addition_info,
__u32 *secdesclen, struct smb_fattr *fattr);
int init_acl_state(struct posix_acl_state *state, int cnt);
@@ -211,25 +211,25 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
void id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid);
void ksmbd_init_domain(u32 *sub_auth);
-static inline uid_t posix_acl_uid_translate(struct user_namespace *mnt_userns,
+static inline uid_t posix_acl_uid_translate(struct mnt_idmap *idmap,
struct posix_acl_entry *pace)
{
vfsuid_t vfsuid;
/* If this is an idmapped mount, apply the idmapping. */
- vfsuid = make_vfsuid(mnt_userns, &init_user_ns, pace->e_uid);
+ vfsuid = make_vfsuid(idmap, &init_user_ns, pace->e_uid);
/* Translate the kuid into a userspace id ksmbd would see. */
return from_kuid(&init_user_ns, vfsuid_into_kuid(vfsuid));
}
-static inline gid_t posix_acl_gid_translate(struct user_namespace *mnt_userns,
+static inline gid_t posix_acl_gid_translate(struct mnt_idmap *idmap,
struct posix_acl_entry *pace)
{
vfsgid_t vfsgid;
/* If this is an idmapped mount, apply the idmapping. */
- vfsgid = make_vfsgid(mnt_userns, &init_user_ns, pace->e_gid);
+ vfsgid = make_vfsgid(idmap, &init_user_ns, pace->e_gid);
/* Translate the kgid into a userspace id ksmbd would see. */
return from_kgid(&init_user_ns, vfsgid_into_kgid(vfsgid));
diff --git a/fs/ksmbd/transport_ipc.c b/fs/ksmbd/transport_ipc.c
index c9aca21637d5..40c721f9227e 100644
--- a/fs/ksmbd/transport_ipc.c
+++ b/fs/ksmbd/transport_ipc.c
@@ -308,6 +308,9 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req)
if (req->smbd_max_io_size)
init_smbd_max_io_size(req->smbd_max_io_size);
+ if (req->max_connections)
+ server_conf.max_connections = req->max_connections;
+
ret = ksmbd_set_netbios_name(req->netbios_name);
ret |= ksmbd_set_server_string(req->server_string);
ret |= ksmbd_set_work_group(req->work_group);
diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c
index 63d55f543bd2..603893fd87f5 100644
--- a/fs/ksmbd/transport_tcp.c
+++ b/fs/ksmbd/transport_tcp.c
@@ -15,6 +15,8 @@
#define IFACE_STATE_DOWN BIT(0)
#define IFACE_STATE_CONFIGURED BIT(1)
+static atomic_t active_num_conn;
+
struct interface {
struct task_struct *ksmbd_kthread;
struct socket *ksmbd_socket;
@@ -185,8 +187,10 @@ static int ksmbd_tcp_new_connection(struct socket *client_sk)
struct tcp_transport *t;
t = alloc_transport(client_sk);
- if (!t)
+ if (!t) {
+ sock_release(client_sk);
return -ENOMEM;
+ }
csin = KSMBD_TCP_PEER_SOCKADDR(KSMBD_TRANS(t)->conn);
if (kernel_getpeername(client_sk, csin) < 0) {
@@ -239,6 +243,15 @@ static int ksmbd_kthread_fn(void *p)
continue;
}
+ if (server_conf.max_connections &&
+ atomic_inc_return(&active_num_conn) >= server_conf.max_connections) {
+ pr_info_ratelimited("Limit the maximum number of connections(%u)\n",
+ atomic_read(&active_num_conn));
+ atomic_dec(&active_num_conn);
+ sock_release(client_sk);
+ continue;
+ }
+
ksmbd_debug(CONN, "connect success: accepted new connection\n");
client_sk->sk->sk_rcvtimeo = KSMBD_TCP_RECV_TIMEOUT;
client_sk->sk->sk_sndtimeo = KSMBD_TCP_SEND_TIMEOUT;
@@ -295,6 +308,7 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig,
struct msghdr ksmbd_msg;
struct kvec *iov;
struct ksmbd_conn *conn = KSMBD_TRANS(t)->conn;
+ int max_retry = 2;
iov = get_conn_iovec(t, nr_segs);
if (!iov)
@@ -321,9 +335,11 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig,
} else if (conn->status == KSMBD_SESS_NEED_RECONNECT) {
total_read = -EAGAIN;
break;
- } else if (length == -ERESTARTSYS || length == -EAGAIN) {
+ } else if ((length == -ERESTARTSYS || length == -EAGAIN) &&
+ max_retry) {
usleep_range(1000, 2000);
length = 0;
+ max_retry--;
continue;
} else if (length <= 0) {
total_read = -EAGAIN;
@@ -365,6 +381,8 @@ static int ksmbd_tcp_writev(struct ksmbd_transport *t, struct kvec *iov,
static void ksmbd_tcp_disconnect(struct ksmbd_transport *t)
{
free_transport(TCP_TRANS(t));
+ if (server_conf.max_connections)
+ atomic_dec(&active_num_conn);
}
static void tcp_destroy_socket(struct socket *ksmbd_socket)
diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
index ff0e7a4fcd4d..aa1300b7bfc2 100644
--- a/fs/ksmbd/vfs.c
+++ b/fs/ksmbd/vfs.c
@@ -6,6 +6,7 @@
#include <linux/kernel.h>
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/uaccess.h>
#include <linux/backing-dev.h>
#include <linux/writeback.h>
@@ -69,14 +70,14 @@ static void ksmbd_vfs_inherit_owner(struct ksmbd_work *work,
*
* the reference count of @parent isn't incremented.
*/
-int ksmbd_vfs_lock_parent(struct user_namespace *user_ns, struct dentry *parent,
+int ksmbd_vfs_lock_parent(struct mnt_idmap *idmap, struct dentry *parent,
struct dentry *child)
{
struct dentry *dentry;
int ret = 0;
inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
- dentry = lookup_one(user_ns, child->d_name.name, parent,
+ dentry = lookup_one(idmap, child->d_name.name, parent,
child->d_name.len);
if (IS_ERR(dentry)) {
ret = PTR_ERR(dentry);
@@ -96,20 +97,20 @@ out_err:
return ret;
}
-int ksmbd_vfs_may_delete(struct user_namespace *user_ns,
+int ksmbd_vfs_may_delete(struct mnt_idmap *idmap,
struct dentry *dentry)
{
struct dentry *parent;
int ret;
parent = dget_parent(dentry);
- ret = ksmbd_vfs_lock_parent(user_ns, parent, dentry);
+ ret = ksmbd_vfs_lock_parent(idmap, parent, dentry);
if (ret) {
dput(parent);
return ret;
}
- ret = inode_permission(user_ns, d_inode(parent),
+ ret = inode_permission(idmap, d_inode(parent),
MAY_EXEC | MAY_WRITE);
inode_unlock(d_inode(parent));
@@ -117,7 +118,7 @@ int ksmbd_vfs_may_delete(struct user_namespace *user_ns,
return ret;
}
-int ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns,
+int ksmbd_vfs_query_maximal_access(struct mnt_idmap *idmap,
struct dentry *dentry, __le32 *daccess)
{
struct dentry *parent;
@@ -125,26 +126,26 @@ int ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns,
*daccess = cpu_to_le32(FILE_READ_ATTRIBUTES | READ_CONTROL);
- if (!inode_permission(user_ns, d_inode(dentry), MAY_OPEN | MAY_WRITE))
+ if (!inode_permission(idmap, d_inode(dentry), MAY_OPEN | MAY_WRITE))
*daccess |= cpu_to_le32(WRITE_DAC | WRITE_OWNER | SYNCHRONIZE |
FILE_WRITE_DATA | FILE_APPEND_DATA |
FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES |
FILE_DELETE_CHILD);
- if (!inode_permission(user_ns, d_inode(dentry), MAY_OPEN | MAY_READ))
+ if (!inode_permission(idmap, d_inode(dentry), MAY_OPEN | MAY_READ))
*daccess |= FILE_READ_DATA_LE | FILE_READ_EA_LE;
- if (!inode_permission(user_ns, d_inode(dentry), MAY_OPEN | MAY_EXEC))
+ if (!inode_permission(idmap, d_inode(dentry), MAY_OPEN | MAY_EXEC))
*daccess |= FILE_EXECUTE_LE;
parent = dget_parent(dentry);
- ret = ksmbd_vfs_lock_parent(user_ns, parent, dentry);
+ ret = ksmbd_vfs_lock_parent(idmap, parent, dentry);
if (ret) {
dput(parent);
return ret;
}
- if (!inode_permission(user_ns, d_inode(parent), MAY_EXEC | MAY_WRITE))
+ if (!inode_permission(idmap, d_inode(parent), MAY_EXEC | MAY_WRITE))
*daccess |= FILE_DELETE_LE;
inode_unlock(d_inode(parent));
@@ -177,7 +178,7 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode)
}
mode |= S_IFREG;
- err = vfs_create(mnt_user_ns(path.mnt), d_inode(path.dentry),
+ err = vfs_create(mnt_idmap(path.mnt), d_inode(path.dentry),
dentry, mode, true);
if (!err) {
ksmbd_vfs_inherit_owner(work, d_inode(path.dentry),
@@ -199,7 +200,7 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode)
*/
int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode)
{
- struct user_namespace *user_ns;
+ struct mnt_idmap *idmap;
struct path path;
struct dentry *dentry;
int err;
@@ -215,15 +216,15 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode)
return err;
}
- user_ns = mnt_user_ns(path.mnt);
+ idmap = mnt_idmap(path.mnt);
mode |= S_IFDIR;
- err = vfs_mkdir(user_ns, d_inode(path.dentry), dentry, mode);
+ err = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode);
if (err) {
goto out;
} else if (d_unhashed(dentry)) {
struct dentry *d;
- d = lookup_one(user_ns, dentry->d_name.name, dentry->d_parent,
+ d = lookup_one(idmap, dentry->d_name.name, dentry->d_parent,
dentry->d_name.len);
if (IS_ERR(d)) {
err = PTR_ERR(d);
@@ -245,7 +246,7 @@ out:
return err;
}
-static ssize_t ksmbd_vfs_getcasexattr(struct user_namespace *user_ns,
+static ssize_t ksmbd_vfs_getcasexattr(struct mnt_idmap *idmap,
struct dentry *dentry, char *attr_name,
int attr_name_len, char **attr_value)
{
@@ -262,7 +263,7 @@ static ssize_t ksmbd_vfs_getcasexattr(struct user_namespace *user_ns,
if (strncasecmp(attr_name, name, attr_name_len))
continue;
- value_len = ksmbd_vfs_getxattr(user_ns,
+ value_len = ksmbd_vfs_getxattr(idmap,
dentry,
name,
attr_value);
@@ -285,7 +286,7 @@ static int ksmbd_vfs_stream_read(struct ksmbd_file *fp, char *buf, loff_t *pos,
ksmbd_debug(VFS, "read stream data pos : %llu, count : %zd\n",
*pos, count);
- v_len = ksmbd_vfs_getcasexattr(file_mnt_user_ns(fp->filp),
+ v_len = ksmbd_vfs_getcasexattr(file_mnt_idmap(fp->filp),
fp->filp->f_path.dentry,
fp->stream.name,
fp->stream.size,
@@ -409,7 +410,7 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
size_t count)
{
char *stream_buf = NULL, *wbuf;
- struct user_namespace *user_ns = file_mnt_user_ns(fp->filp);
+ struct mnt_idmap *idmap = file_mnt_idmap(fp->filp);
size_t size, v_len;
int err = 0;
@@ -422,7 +423,7 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
count = (*pos + count) - XATTR_SIZE_MAX;
}
- v_len = ksmbd_vfs_getcasexattr(user_ns,
+ v_len = ksmbd_vfs_getcasexattr(idmap,
fp->filp->f_path.dentry,
fp->stream.name,
fp->stream.size,
@@ -448,7 +449,7 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
memcpy(&stream_buf[*pos], buf, count);
- err = ksmbd_vfs_setxattr(user_ns,
+ err = ksmbd_vfs_setxattr(idmap,
fp->filp->f_path.dentry,
fp->stream.name,
(void *)stream_buf,
@@ -583,7 +584,7 @@ int ksmbd_vfs_fsync(struct ksmbd_work *work, u64 fid, u64 p_id)
*/
int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name)
{
- struct user_namespace *user_ns;
+ struct mnt_idmap *idmap;
struct path path;
struct dentry *parent;
int err;
@@ -598,9 +599,9 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name)
return err;
}
- user_ns = mnt_user_ns(path.mnt);
+ idmap = mnt_idmap(path.mnt);
parent = dget_parent(path.dentry);
- err = ksmbd_vfs_lock_parent(user_ns, parent, path.dentry);
+ err = ksmbd_vfs_lock_parent(idmap, parent, path.dentry);
if (err) {
dput(parent);
path_put(&path);
@@ -614,12 +615,12 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name)
}
if (S_ISDIR(d_inode(path.dentry)->i_mode)) {
- err = vfs_rmdir(user_ns, d_inode(parent), path.dentry);
+ err = vfs_rmdir(idmap, d_inode(parent), path.dentry);
if (err && err != -ENOTEMPTY)
ksmbd_debug(VFS, "%s: rmdir failed, err %d\n", name,
err);
} else {
- err = vfs_unlink(user_ns, d_inode(parent), path.dentry, NULL);
+ err = vfs_unlink(idmap, d_inode(parent), path.dentry, NULL);
if (err)
ksmbd_debug(VFS, "%s: unlink failed, err %d\n", name,
err);
@@ -672,7 +673,7 @@ int ksmbd_vfs_link(struct ksmbd_work *work, const char *oldname,
goto out3;
}
- err = vfs_link(oldpath.dentry, mnt_user_ns(newpath.mnt),
+ err = vfs_link(oldpath.dentry, mnt_idmap(newpath.mnt),
d_inode(newpath.dentry),
dentry, NULL);
if (err)
@@ -711,10 +712,10 @@ static int ksmbd_validate_entry_in_use(struct dentry *src_dent)
}
static int __ksmbd_vfs_rename(struct ksmbd_work *work,
- struct user_namespace *src_user_ns,
+ struct mnt_idmap *src_idmap,
struct dentry *src_dent_parent,
struct dentry *src_dent,
- struct user_namespace *dst_user_ns,
+ struct mnt_idmap *dst_idmap,
struct dentry *dst_dent_parent,
struct dentry *trap_dent,
char *dst_name)
@@ -740,8 +741,8 @@ static int __ksmbd_vfs_rename(struct ksmbd_work *work,
if (ksmbd_override_fsids(work))
return -ENOMEM;
- dst_dent = lookup_one(dst_user_ns, dst_name, dst_dent_parent,
- strlen(dst_name));
+ dst_dent = lookup_one(dst_idmap, dst_name,
+ dst_dent_parent, strlen(dst_name));
err = PTR_ERR(dst_dent);
if (IS_ERR(dst_dent)) {
pr_err("lookup failed %s [%d]\n", dst_name, err);
@@ -751,10 +752,10 @@ static int __ksmbd_vfs_rename(struct ksmbd_work *work,
err = -ENOTEMPTY;
if (dst_dent != trap_dent && !d_really_is_positive(dst_dent)) {
struct renamedata rd = {
- .old_mnt_userns = src_user_ns,
+ .old_mnt_idmap = src_idmap,
.old_dir = d_inode(src_dent_parent),
.old_dentry = src_dent,
- .new_mnt_userns = dst_user_ns,
+ .new_mnt_idmap = dst_idmap,
.new_dir = d_inode(dst_dent_parent),
.new_dentry = dst_dent,
};
@@ -772,7 +773,7 @@ out:
int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
char *newname)
{
- struct user_namespace *user_ns;
+ struct mnt_idmap *idmap;
struct path dst_path;
struct dentry *src_dent_parent, *dst_dent_parent;
struct dentry *src_dent, *trap_dent, *src_child;
@@ -800,8 +801,8 @@ int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
trap_dent = lock_rename(src_dent_parent, dst_dent_parent);
dget(src_dent);
dget(dst_dent_parent);
- user_ns = file_mnt_user_ns(fp->filp);
- src_child = lookup_one(user_ns, src_dent->d_name.name, src_dent_parent,
+ idmap = file_mnt_idmap(fp->filp);
+ src_child = lookup_one(idmap, src_dent->d_name.name, src_dent_parent,
src_dent->d_name.len);
if (IS_ERR(src_child)) {
err = PTR_ERR(src_child);
@@ -816,10 +817,10 @@ int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
dput(src_child);
err = __ksmbd_vfs_rename(work,
- user_ns,
+ idmap,
src_dent_parent,
src_dent,
- mnt_user_ns(dst_path.mnt),
+ mnt_idmap(dst_path.mnt),
dst_dent_parent,
trap_dent,
dst_name);
@@ -907,22 +908,22 @@ ssize_t ksmbd_vfs_listxattr(struct dentry *dentry, char **list)
return size;
}
-static ssize_t ksmbd_vfs_xattr_len(struct user_namespace *user_ns,
+static ssize_t ksmbd_vfs_xattr_len(struct mnt_idmap *idmap,
struct dentry *dentry, char *xattr_name)
{
- return vfs_getxattr(user_ns, dentry, xattr_name, NULL, 0);
+ return vfs_getxattr(idmap, dentry, xattr_name, NULL, 0);
}
/**
* ksmbd_vfs_getxattr() - vfs helper for smb get extended attributes value
- * @user_ns: user namespace
+ * @idmap: idmap
* @dentry: dentry of file for getting xattrs
* @xattr_name: name of xattr name to query
* @xattr_buf: destination buffer xattr value
*
* Return: read xattr value length on success, otherwise error
*/
-ssize_t ksmbd_vfs_getxattr(struct user_namespace *user_ns,
+ssize_t ksmbd_vfs_getxattr(struct mnt_idmap *idmap,
struct dentry *dentry,
char *xattr_name, char **xattr_buf)
{
@@ -930,7 +931,7 @@ ssize_t ksmbd_vfs_getxattr(struct user_namespace *user_ns,
char *buf;
*xattr_buf = NULL;
- xattr_len = ksmbd_vfs_xattr_len(user_ns, dentry, xattr_name);
+ xattr_len = ksmbd_vfs_xattr_len(idmap, dentry, xattr_name);
if (xattr_len < 0)
return xattr_len;
@@ -938,7 +939,7 @@ ssize_t ksmbd_vfs_getxattr(struct user_namespace *user_ns,
if (!buf)
return -ENOMEM;
- xattr_len = vfs_getxattr(user_ns, dentry, xattr_name,
+ xattr_len = vfs_getxattr(idmap, dentry, xattr_name,
(void *)buf, xattr_len);
if (xattr_len > 0)
*xattr_buf = buf;
@@ -949,7 +950,7 @@ ssize_t ksmbd_vfs_getxattr(struct user_namespace *user_ns,
/**
* ksmbd_vfs_setxattr() - vfs helper for smb set extended attributes value
- * @user_ns: user namespace
+ * @idmap: idmap of the relevant mount
* @dentry: dentry to set XATTR at
* @name: xattr name for setxattr
* @value: xattr value to set
@@ -958,13 +959,13 @@ ssize_t ksmbd_vfs_getxattr(struct user_namespace *user_ns,
*
* Return: 0 on success, otherwise error
*/
-int ksmbd_vfs_setxattr(struct user_namespace *user_ns,
+int ksmbd_vfs_setxattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *attr_name,
void *attr_value, size_t attr_size, int flags)
{
int err;
- err = vfs_setxattr(user_ns,
+ err = vfs_setxattr(idmap,
dentry,
attr_name,
attr_value,
@@ -1074,26 +1075,26 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length,
return ret;
}
-int ksmbd_vfs_remove_xattr(struct user_namespace *user_ns,
+int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap,
struct dentry *dentry, char *attr_name)
{
- return vfs_removexattr(user_ns, dentry, attr_name);
+ return vfs_removexattr(idmap, dentry, attr_name);
}
-int ksmbd_vfs_unlink(struct user_namespace *user_ns,
+int ksmbd_vfs_unlink(struct mnt_idmap *idmap,
struct dentry *dir, struct dentry *dentry)
{
int err = 0;
- err = ksmbd_vfs_lock_parent(user_ns, dir, dentry);
+ err = ksmbd_vfs_lock_parent(idmap, dir, dentry);
if (err)
return err;
dget(dentry);
if (S_ISDIR(d_inode(dentry)->i_mode))
- err = vfs_rmdir(user_ns, d_inode(dir), dentry);
+ err = vfs_rmdir(idmap, d_inode(dir), dentry);
else
- err = vfs_unlink(user_ns, d_inode(dir), dentry, NULL);
+ err = vfs_unlink(idmap, d_inode(dir), dentry, NULL);
dput(dentry);
inode_unlock(d_inode(dir));
@@ -1298,7 +1299,7 @@ struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work,
return dent;
}
-int ksmbd_vfs_remove_acl_xattrs(struct user_namespace *user_ns,
+int ksmbd_vfs_remove_acl_xattrs(struct mnt_idmap *idmap,
struct dentry *dentry)
{
char *name, *xattr_list = NULL;
@@ -1321,7 +1322,7 @@ int ksmbd_vfs_remove_acl_xattrs(struct user_namespace *user_ns,
sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1) ||
!strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1)) {
- err = vfs_remove_acl(user_ns, dentry, name);
+ err = vfs_remove_acl(idmap, dentry, name);
if (err)
ksmbd_debug(SMB,
"remove acl xattr failed : %s\n", name);
@@ -1332,7 +1333,7 @@ out:
return err;
}
-int ksmbd_vfs_remove_sd_xattrs(struct user_namespace *user_ns,
+int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap,
struct dentry *dentry)
{
char *name, *xattr_list = NULL;
@@ -1352,7 +1353,7 @@ int ksmbd_vfs_remove_sd_xattrs(struct user_namespace *user_ns,
ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name));
if (!strncmp(name, XATTR_NAME_SD, XATTR_NAME_SD_LEN)) {
- err = ksmbd_vfs_remove_xattr(user_ns, dentry, name);
+ err = ksmbd_vfs_remove_xattr(idmap, dentry, name);
if (err)
ksmbd_debug(SMB, "remove xattr failed : %s\n", name);
}
@@ -1362,7 +1363,7 @@ out:
return err;
}
-static struct xattr_smb_acl *ksmbd_vfs_make_xattr_posix_acl(struct user_namespace *user_ns,
+static struct xattr_smb_acl *ksmbd_vfs_make_xattr_posix_acl(struct mnt_idmap *idmap,
struct inode *inode,
int acl_type)
{
@@ -1392,14 +1393,14 @@ static struct xattr_smb_acl *ksmbd_vfs_make_xattr_posix_acl(struct user_namespac
switch (pa_entry->e_tag) {
case ACL_USER:
xa_entry->type = SMB_ACL_USER;
- xa_entry->uid = posix_acl_uid_translate(user_ns, pa_entry);
+ xa_entry->uid = posix_acl_uid_translate(idmap, pa_entry);
break;
case ACL_USER_OBJ:
xa_entry->type = SMB_ACL_USER_OBJ;
break;
case ACL_GROUP:
xa_entry->type = SMB_ACL_GROUP;
- xa_entry->gid = posix_acl_gid_translate(user_ns, pa_entry);
+ xa_entry->gid = posix_acl_gid_translate(idmap, pa_entry);
break;
case ACL_GROUP_OBJ:
xa_entry->type = SMB_ACL_GROUP_OBJ;
@@ -1428,7 +1429,7 @@ out:
}
int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
- struct user_namespace *user_ns,
+ struct mnt_idmap *idmap,
struct dentry *dentry,
struct smb_ntsd *pntsd, int len)
{
@@ -1461,13 +1462,13 @@ int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
return rc;
}
- smb_acl = ksmbd_vfs_make_xattr_posix_acl(user_ns, inode,
+ smb_acl = ksmbd_vfs_make_xattr_posix_acl(idmap, inode,
ACL_TYPE_ACCESS);
if (S_ISDIR(inode->i_mode))
- def_smb_acl = ksmbd_vfs_make_xattr_posix_acl(user_ns, inode,
+ def_smb_acl = ksmbd_vfs_make_xattr_posix_acl(idmap, inode,
ACL_TYPE_DEFAULT);
- rc = ndr_encode_posix_acl(&acl_ndr, user_ns, inode,
+ rc = ndr_encode_posix_acl(&acl_ndr, idmap, inode,
smb_acl, def_smb_acl);
if (rc) {
pr_err("failed to encode ndr to posix acl\n");
@@ -1487,7 +1488,7 @@ int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
goto out;
}
- rc = ksmbd_vfs_setxattr(user_ns, dentry,
+ rc = ksmbd_vfs_setxattr(idmap, dentry,
XATTR_NAME_SD, sd_ndr.data,
sd_ndr.offset, 0);
if (rc < 0)
@@ -1502,7 +1503,7 @@ out:
}
int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn,
- struct user_namespace *user_ns,
+ struct mnt_idmap *idmap,
struct dentry *dentry,
struct smb_ntsd **pntsd)
{
@@ -1514,7 +1515,7 @@ int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn,
struct xattr_smb_acl *smb_acl = NULL, *def_smb_acl = NULL;
__u8 cmp_hash[XATTR_SD_HASH_SIZE] = {0};
- rc = ksmbd_vfs_getxattr(user_ns, dentry, XATTR_NAME_SD, &n.data);
+ rc = ksmbd_vfs_getxattr(idmap, dentry, XATTR_NAME_SD, &n.data);
if (rc <= 0)
return rc;
@@ -1523,13 +1524,13 @@ int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn,
if (rc)
goto free_n_data;
- smb_acl = ksmbd_vfs_make_xattr_posix_acl(user_ns, inode,
+ smb_acl = ksmbd_vfs_make_xattr_posix_acl(idmap, inode,
ACL_TYPE_ACCESS);
if (S_ISDIR(inode->i_mode))
- def_smb_acl = ksmbd_vfs_make_xattr_posix_acl(user_ns, inode,
+ def_smb_acl = ksmbd_vfs_make_xattr_posix_acl(idmap, inode,
ACL_TYPE_DEFAULT);
- rc = ndr_encode_posix_acl(&acl_ndr, user_ns, inode, smb_acl,
+ rc = ndr_encode_posix_acl(&acl_ndr, idmap, inode, smb_acl,
def_smb_acl);
if (rc) {
pr_err("failed to encode ndr to posix acl\n");
@@ -1576,7 +1577,7 @@ free_n_data:
return rc;
}
-int ksmbd_vfs_set_dos_attrib_xattr(struct user_namespace *user_ns,
+int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap,
struct dentry *dentry,
struct xattr_dos_attrib *da)
{
@@ -1587,7 +1588,7 @@ int ksmbd_vfs_set_dos_attrib_xattr(struct user_namespace *user_ns,
if (err)
return err;
- err = ksmbd_vfs_setxattr(user_ns, dentry, XATTR_NAME_DOS_ATTRIBUTE,
+ err = ksmbd_vfs_setxattr(idmap, dentry, XATTR_NAME_DOS_ATTRIBUTE,
(void *)n.data, n.offset, 0);
if (err)
ksmbd_debug(SMB, "failed to store dos attribute in xattr\n");
@@ -1596,14 +1597,14 @@ int ksmbd_vfs_set_dos_attrib_xattr(struct user_namespace *user_ns,
return err;
}
-int ksmbd_vfs_get_dos_attrib_xattr(struct user_namespace *user_ns,
+int ksmbd_vfs_get_dos_attrib_xattr(struct mnt_idmap *idmap,
struct dentry *dentry,
struct xattr_dos_attrib *da)
{
struct ndr n;
int err;
- err = ksmbd_vfs_getxattr(user_ns, dentry, XATTR_NAME_DOS_ATTRIBUTE,
+ err = ksmbd_vfs_getxattr(idmap, dentry, XATTR_NAME_DOS_ATTRIBUTE,
(char **)&n.data);
if (err > 0) {
n.length = err;
@@ -1650,14 +1651,14 @@ void *ksmbd_vfs_init_kstat(char **p, struct ksmbd_kstat *ksmbd_kstat)
}
int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work,
- struct user_namespace *user_ns,
+ struct mnt_idmap *idmap,
struct dentry *dentry,
struct ksmbd_kstat *ksmbd_kstat)
{
u64 time;
int rc;
- generic_fillattr(user_ns, d_inode(dentry), ksmbd_kstat->kstat);
+ generic_fillattr(idmap, d_inode(dentry), ksmbd_kstat->kstat);
time = ksmbd_UnixTimeToNT(ksmbd_kstat->kstat->ctime);
ksmbd_kstat->create_time = time;
@@ -1675,7 +1676,7 @@ int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work,
KSMBD_SHARE_FLAG_STORE_DOS_ATTRS)) {
struct xattr_dos_attrib da;
- rc = ksmbd_vfs_get_dos_attrib_xattr(user_ns, dentry, &da);
+ rc = ksmbd_vfs_get_dos_attrib_xattr(idmap, dentry, &da);
if (rc > 0) {
ksmbd_kstat->file_attributes = cpu_to_le32(da.attr);
ksmbd_kstat->create_time = da.create_time;
@@ -1687,7 +1688,7 @@ int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work,
return 0;
}
-ssize_t ksmbd_vfs_casexattr_len(struct user_namespace *user_ns,
+ssize_t ksmbd_vfs_casexattr_len(struct mnt_idmap *idmap,
struct dentry *dentry, char *attr_name,
int attr_name_len)
{
@@ -1704,7 +1705,7 @@ ssize_t ksmbd_vfs_casexattr_len(struct user_namespace *user_ns,
if (strncasecmp(attr_name, name, attr_name_len))
continue;
- value_len = ksmbd_vfs_xattr_len(user_ns, dentry, name);
+ value_len = ksmbd_vfs_xattr_len(idmap, dentry, name);
break;
}
@@ -1823,7 +1824,7 @@ void ksmbd_vfs_posix_lock_unblock(struct file_lock *flock)
locks_delete_block(flock);
}
-int ksmbd_vfs_set_init_posix_acl(struct user_namespace *user_ns,
+int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap,
struct dentry *dentry)
{
struct posix_acl_state acl_state;
@@ -1857,13 +1858,13 @@ int ksmbd_vfs_set_init_posix_acl(struct user_namespace *user_ns,
return -ENOMEM;
}
posix_state_to_acl(&acl_state, acls->a_entries);
- rc = set_posix_acl(user_ns, dentry, ACL_TYPE_ACCESS, acls);
+ rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls);
if (rc < 0)
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
rc);
else if (S_ISDIR(inode->i_mode)) {
posix_state_to_acl(&acl_state, acls->a_entries);
- rc = set_posix_acl(user_ns, dentry, ACL_TYPE_DEFAULT, acls);
+ rc = set_posix_acl(idmap, dentry, ACL_TYPE_DEFAULT, acls);
if (rc < 0)
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
rc);
@@ -1873,7 +1874,7 @@ int ksmbd_vfs_set_init_posix_acl(struct user_namespace *user_ns,
return rc;
}
-int ksmbd_vfs_inherit_posix_acl(struct user_namespace *user_ns,
+int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap,
struct dentry *dentry, struct inode *parent_inode)
{
struct posix_acl *acls;
@@ -1896,12 +1897,12 @@ int ksmbd_vfs_inherit_posix_acl(struct user_namespace *user_ns,
}
}
- rc = set_posix_acl(user_ns, dentry, ACL_TYPE_ACCESS, acls);
+ rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls);
if (rc < 0)
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
rc);
if (S_ISDIR(inode->i_mode)) {
- rc = set_posix_acl(user_ns, dentry, ACL_TYPE_DEFAULT,
+ rc = set_posix_acl(idmap, dentry, ACL_TYPE_DEFAULT,
acls);
if (rc < 0)
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h
index 0d73d735cc39..9d676ab0cd25 100644
--- a/fs/ksmbd/vfs.h
+++ b/fs/ksmbd/vfs.h
@@ -71,10 +71,10 @@ struct ksmbd_kstat {
__le32 file_attributes;
};
-int ksmbd_vfs_lock_parent(struct user_namespace *user_ns, struct dentry *parent,
+int ksmbd_vfs_lock_parent(struct mnt_idmap *idmap, struct dentry *parent,
struct dentry *child);
-int ksmbd_vfs_may_delete(struct user_namespace *user_ns, struct dentry *dentry);
-int ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns,
+int ksmbd_vfs_may_delete(struct mnt_idmap *idmap, struct dentry *dentry);
+int ksmbd_vfs_query_maximal_access(struct mnt_idmap *idmap,
struct dentry *dentry, __le32 *daccess);
int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode);
int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode);
@@ -102,19 +102,19 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work,
unsigned int *chunk_size_written,
loff_t *total_size_written);
ssize_t ksmbd_vfs_listxattr(struct dentry *dentry, char **list);
-ssize_t ksmbd_vfs_getxattr(struct user_namespace *user_ns,
+ssize_t ksmbd_vfs_getxattr(struct mnt_idmap *idmap,
struct dentry *dentry,
char *xattr_name,
char **xattr_buf);
-ssize_t ksmbd_vfs_casexattr_len(struct user_namespace *user_ns,
+ssize_t ksmbd_vfs_casexattr_len(struct mnt_idmap *idmap,
struct dentry *dentry, char *attr_name,
int attr_name_len);
-int ksmbd_vfs_setxattr(struct user_namespace *user_ns,
+int ksmbd_vfs_setxattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *attr_name,
void *attr_value, size_t attr_size, int flags);
int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name,
size_t *xattr_stream_name_size, int s_type);
-int ksmbd_vfs_remove_xattr(struct user_namespace *user_ns,
+int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap,
struct dentry *dentry, char *attr_name);
int ksmbd_vfs_kern_path(struct ksmbd_work *work,
char *name, unsigned int flags, struct path *path,
@@ -131,37 +131,37 @@ struct file_allocated_range_buffer;
int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length,
struct file_allocated_range_buffer *ranges,
unsigned int in_count, unsigned int *out_count);
-int ksmbd_vfs_unlink(struct user_namespace *user_ns,
- struct dentry *dir, struct dentry *dentry);
+int ksmbd_vfs_unlink(struct mnt_idmap *idmap, struct dentry *dir,
+ struct dentry *dentry);
void *ksmbd_vfs_init_kstat(char **p, struct ksmbd_kstat *ksmbd_kstat);
int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work,
- struct user_namespace *user_ns,
+ struct mnt_idmap *idmap,
struct dentry *dentry,
struct ksmbd_kstat *ksmbd_kstat);
void ksmbd_vfs_posix_lock_wait(struct file_lock *flock);
int ksmbd_vfs_posix_lock_wait_timeout(struct file_lock *flock, long timeout);
void ksmbd_vfs_posix_lock_unblock(struct file_lock *flock);
-int ksmbd_vfs_remove_acl_xattrs(struct user_namespace *user_ns,
+int ksmbd_vfs_remove_acl_xattrs(struct mnt_idmap *idmap,
struct dentry *dentry);
-int ksmbd_vfs_remove_sd_xattrs(struct user_namespace *user_ns,
+int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap,
struct dentry *dentry);
int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
- struct user_namespace *user_ns,
+ struct mnt_idmap *idmap,
struct dentry *dentry,
struct smb_ntsd *pntsd, int len);
int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn,
- struct user_namespace *user_ns,
+ struct mnt_idmap *idmap,
struct dentry *dentry,
struct smb_ntsd **pntsd);
-int ksmbd_vfs_set_dos_attrib_xattr(struct user_namespace *user_ns,
+int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap,
struct dentry *dentry,
struct xattr_dos_attrib *da);
-int ksmbd_vfs_get_dos_attrib_xattr(struct user_namespace *user_ns,
+int ksmbd_vfs_get_dos_attrib_xattr(struct mnt_idmap *idmap,
struct dentry *dentry,
struct xattr_dos_attrib *da);
-int ksmbd_vfs_set_init_posix_acl(struct user_namespace *user_ns,
+int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap,
struct dentry *dentry);
-int ksmbd_vfs_inherit_posix_acl(struct user_namespace *user_ns,
+int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap,
struct dentry *dentry,
struct inode *parent_inode);
#endif /* __KSMBD_VFS_H__ */
diff --git a/fs/ksmbd/vfs_cache.c b/fs/ksmbd/vfs_cache.c
index da9163b00350..1d8126443a7f 100644
--- a/fs/ksmbd/vfs_cache.c
+++ b/fs/ksmbd/vfs_cache.c
@@ -5,6 +5,7 @@
*/
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
@@ -251,7 +252,7 @@ static void __ksmbd_inode_close(struct ksmbd_file *fp)
filp = fp->filp;
if (ksmbd_stream_fd(fp) && (ci->m_flags & S_DEL_ON_CLS_STREAM)) {
ci->m_flags &= ~S_DEL_ON_CLS_STREAM;
- err = ksmbd_vfs_remove_xattr(file_mnt_user_ns(filp),
+ err = ksmbd_vfs_remove_xattr(file_mnt_idmap(filp),
filp->f_path.dentry,
fp->stream.name);
if (err)
@@ -266,7 +267,7 @@ static void __ksmbd_inode_close(struct ksmbd_file *fp)
dir = dentry->d_parent;
ci->m_flags &= ~(S_DEL_ON_CLS | S_DEL_PENDING);
write_unlock(&ci->m_lock);
- ksmbd_vfs_unlink(file_mnt_user_ns(filp), dir, dentry);
+ ksmbd_vfs_unlink(file_mnt_idmap(filp), dir, dentry);
write_lock(&ci->m_lock);
}
write_unlock(&ci->m_lock);
diff --git a/fs/libfs.c b/fs/libfs.c
index aada4e7c8713..4eda519c3002 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -28,12 +28,12 @@
#include "internal.h"
-int simple_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int simple_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask,
unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9);
return 0;
}
@@ -473,7 +473,7 @@ int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
}
EXPORT_SYMBOL_GPL(simple_rename_exchange);
-int simple_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+int simple_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
@@ -509,7 +509,7 @@ EXPORT_SYMBOL(simple_rename);
/**
* simple_setattr - setattr for simple filesystem
- * @mnt_userns: user namespace of the target mount
+ * @idmap: idmap of the target mount
* @dentry: dentry
* @iattr: iattr structure
*
@@ -522,19 +522,19 @@ EXPORT_SYMBOL(simple_rename);
* on simple regular filesystems. Anything that needs to change on-disk
* or wire state on size changes needs its own setattr method.
*/
-int simple_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int simple_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
int error;
- error = setattr_prepare(mnt_userns, dentry, iattr);
+ error = setattr_prepare(idmap, dentry, iattr);
if (error)
return error;
if (iattr->ia_valid & ATTR_SIZE)
truncate_setsize(inode, iattr->ia_size);
- setattr_copy(mnt_userns, inode, iattr);
+ setattr_copy(idmap, inode, iattr);
mark_inode_dirty(inode);
return 0;
}
@@ -1315,16 +1315,16 @@ static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(-ENOENT);
}
-static int empty_dir_getattr(struct user_namespace *mnt_userns,
+static int empty_dir_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
return 0;
}
-static int empty_dir_setattr(struct user_namespace *mnt_userns,
+static int empty_dir_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr)
{
return -EPERM;
@@ -1582,3 +1582,39 @@ bool inode_maybe_inc_iversion(struct inode *inode, bool force)
return true;
}
EXPORT_SYMBOL(inode_maybe_inc_iversion);
+
+/**
+ * inode_query_iversion - read i_version for later use
+ * @inode: inode from which i_version should be read
+ *
+ * Read the inode i_version counter. This should be used by callers that wish
+ * to store the returned i_version for later comparison. This will guarantee
+ * that a later query of the i_version will result in a different value if
+ * anything has changed.
+ *
+ * In this implementation, we fetch the current value, set the QUERIED flag and
+ * then try to swap it into place with a cmpxchg, if it wasn't already set. If
+ * that fails, we try again with the newly fetched value from the cmpxchg.
+ */
+u64 inode_query_iversion(struct inode *inode)
+{
+ u64 cur, new;
+
+ cur = inode_peek_iversion_raw(inode);
+ do {
+ /* If flag is already set, then no need to swap */
+ if (cur & I_VERSION_QUERIED) {
+ /*
+ * This barrier (and the implicit barrier in the
+ * cmpxchg below) pairs with the barrier in
+ * inode_maybe_inc_iversion().
+ */
+ smp_mb();
+ break;
+ }
+
+ new = cur | I_VERSION_QUERIED;
+ } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new));
+ return cur >> I_VERSION_QUERIED_SHIFT;
+}
+EXPORT_SYMBOL(inode_query_iversion);
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index a5bb3f721a9d..82b19a30e0f0 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -188,7 +188,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
continue;
if (!rpc_cmp_addr(nlm_addr(block->b_host), addr))
continue;
- if (nfs_compare_fh(NFS_FH(locks_inode(fl_blocked->fl_file)), fh) != 0)
+ if (nfs_compare_fh(NFS_FH(file_inode(fl_blocked->fl_file)), fh) != 0)
continue;
/* Alright, we found a lock. Set the return status
* and wake up the caller
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 99fffc9cb958..16b4de868cd2 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -12,6 +12,7 @@
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/nfs_fs.h>
#include <linux/utsname.h>
#include <linux/freezer.h>
@@ -130,7 +131,7 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
char *nodename = req->a_host->h_rpcclnt->cl_nodename;
nlmclnt_next_cookie(&argp->cookie);
- memcpy(&lock->fh, NFS_FH(locks_inode(fl->fl_file)), sizeof(struct nfs_fh));
+ memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh));
lock->caller = nodename;
lock->oh.data = req->a_owner;
lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h
index 5bec78c8e431..17432c445fe6 100644
--- a/fs/lockd/netns.h
+++ b/fs/lockd/netns.h
@@ -3,6 +3,7 @@
#define __LOCKD_NETNS_H__
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <net/netns/generic.h>
struct lockd_net {
diff --git a/fs/locks.c b/fs/locks.c
index 8f01bee17715..624c6ac92ede 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -52,6 +52,7 @@
#include <linux/capability.h>
#include <linux/file.h>
#include <linux/fdtable.h>
+#include <linux/filelock.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/security.h>
@@ -233,7 +234,7 @@ locks_check_ctx_file_list(struct file *filp, struct list_head *list,
char *list_type)
{
struct file_lock *fl;
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
list_for_each_entry(fl, list, fl_list)
if (fl->fl_file == filp)
@@ -887,7 +888,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
{
struct file_lock *cfl;
struct file_lock_context *ctx;
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
void *owner;
void (*func)(void);
@@ -1330,7 +1331,7 @@ retry:
int posix_lock_file(struct file *filp, struct file_lock *fl,
struct file_lock *conflock)
{
- return posix_lock_inode(locks_inode(filp), fl, conflock);
+ return posix_lock_inode(file_inode(filp), fl, conflock);
}
EXPORT_SYMBOL(posix_lock_file);
@@ -1629,7 +1630,7 @@ EXPORT_SYMBOL(lease_get_mtime);
int fcntl_getlease(struct file *filp)
{
struct file_lock *fl;
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
struct file_lock_context *ctx;
int type = F_UNLCK;
LIST_HEAD(dispose);
@@ -1667,7 +1668,7 @@ int fcntl_getlease(struct file *filp)
static int
check_conflicting_open(struct file *filp, const long arg, int flags)
{
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
int self_wcount = 0, self_rcount = 0;
if (flags & FL_LAYOUT)
@@ -1703,7 +1704,7 @@ static int
generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv)
{
struct file_lock *fl, *my_fl = NULL, *lease;
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
struct file_lock_context *ctx;
bool is_deleg = (*flp)->fl_flags & FL_DELEG;
int error;
@@ -1819,7 +1820,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
{
int error = -EAGAIN;
struct file_lock *fl, *victim = NULL;
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
struct file_lock_context *ctx;
LIST_HEAD(dispose);
@@ -1861,7 +1862,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
void **priv)
{
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
int error;
if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE))
@@ -2350,7 +2351,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
struct flock *flock)
{
struct file_lock *file_lock = locks_alloc_lock();
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
struct file *f;
int error;
@@ -2554,7 +2555,7 @@ out:
void locks_remove_posix(struct file *filp, fl_owner_t owner)
{
int error;
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
struct file_lock lock;
struct file_lock_context *ctx;
@@ -2591,7 +2592,7 @@ static void
locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
{
struct file_lock fl;
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
if (list_empty(&flctx->flc_flock))
return;
@@ -2636,7 +2637,7 @@ void locks_remove_file(struct file *filp)
{
struct file_lock_context *ctx;
- ctx = locks_inode_context(locks_inode(filp));
+ ctx = locks_inode_context(file_inode(filp));
if (!ctx)
return;
@@ -2720,7 +2721,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
*/
if (fl->fl_file != NULL)
- inode = locks_inode(fl->fl_file);
+ inode = file_inode(fl->fl_file);
seq_printf(f, "%lld: ", id);
@@ -2861,7 +2862,7 @@ static void __show_fd_locks(struct seq_file *f,
void show_fd_locks(struct seq_file *f,
struct file *filp, struct files_struct *files)
{
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
struct file_lock_context *ctx;
int id = 0;
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 9115948c624e..724d8191a310 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -252,7 +252,7 @@ struct inode *minix_new_inode(const struct inode *dir, umode_t mode, int *error)
iput(inode);
return NULL;
}
- inode_init_owner(&init_user_ns, inode, dir, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
inode->i_ino = j;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
inode->i_blocks = 0;
diff --git a/fs/minix/file.c b/fs/minix/file.c
index 6a7bd2d9eec0..0dd05d47724a 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -22,13 +22,13 @@ const struct file_operations minix_file_operations = {
.splice_read = generic_file_splice_read,
};
-static int minix_setattr(struct user_namespace *mnt_userns,
+static int minix_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
int error;
- error = setattr_prepare(&init_user_ns, dentry, attr);
+ error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (error)
return error;
@@ -42,7 +42,7 @@ static int minix_setattr(struct user_namespace *mnt_userns,
minix_truncate(inode);
}
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(&nop_mnt_idmap, inode, attr);
mark_inode_dirty(inode);
return 0;
}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index da8bdd1712a7..e9fbb5303a22 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -654,13 +654,13 @@ static int minix_write_inode(struct inode *inode, struct writeback_control *wbc)
return err;
}
-int minix_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int minix_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct super_block *sb = path->dentry->d_sb;
struct inode *inode = d_inode(path->dentry);
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
if (INODE_VERSION(inode) == MINIX_V1)
stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb);
else
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 202173368025..e0b76defa85c 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -51,7 +51,7 @@ extern unsigned long minix_count_free_inodes(struct super_block *sb);
extern int minix_new_block(struct inode * inode);
extern void minix_free_block(struct inode *inode, unsigned long block);
extern unsigned long minix_count_free_blocks(struct super_block *sb);
-extern int minix_getattr(struct user_namespace *, const struct path *,
+extern int minix_getattr(struct mnt_idmap *, const struct path *,
struct kstat *, u32, unsigned int);
extern int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len);
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 8afdc408ca4f..39ebe10d6a8b 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -33,7 +33,7 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, un
return d_splice_alias(inode, dentry);
}
-static int minix_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int minix_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
int error;
@@ -52,7 +52,7 @@ static int minix_mknod(struct user_namespace *mnt_userns, struct inode *dir,
return error;
}
-static int minix_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+static int minix_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
struct file *file, umode_t mode)
{
int error;
@@ -65,13 +65,13 @@ static int minix_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
return finish_open_simple(file, error);
}
-static int minix_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int minix_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
- return minix_mknod(mnt_userns, dir, dentry, mode, 0);
+ return minix_mknod(&nop_mnt_idmap, dir, dentry, mode, 0);
}
-static int minix_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int minix_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
int err = -ENAMETOOLONG;
@@ -111,7 +111,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
return add_nondir(dentry, inode);
}
-static int minix_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int minix_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct inode * inode;
@@ -184,7 +184,7 @@ static int minix_rmdir(struct inode * dir, struct dentry *dentry)
return err;
}
-static int minix_rename(struct user_namespace *mnt_userns,
+static int minix_rename(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c
new file mode 100644
index 000000000000..4905665c47d0
--- /dev/null
+++ b/fs/mnt_idmapping.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Christian Brauner <brauner@kernel.org> */
+
+#include <linux/cred.h>
+#include <linux/fs.h>
+#include <linux/mnt_idmapping.h>
+#include <linux/slab.h>
+#include <linux/user_namespace.h>
+
+#include "internal.h"
+
+struct mnt_idmap {
+ struct user_namespace *owner;
+ refcount_t count;
+};
+
+/*
+ * Carries the initial idmapping of 0:0:4294967295 which is an identity
+ * mapping. This means that {g,u}id 0 is mapped to {g,u}id 0, {g,u}id 1 is
+ * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...].
+ */
+struct mnt_idmap nop_mnt_idmap = {
+ .owner = &init_user_ns,
+ .count = REFCOUNT_INIT(1),
+};
+EXPORT_SYMBOL_GPL(nop_mnt_idmap);
+
+/**
+ * check_fsmapping - check whether an mount idmapping is allowed
+ * @idmap: idmap of the relevent mount
+ * @sb: super block of the filesystem
+ *
+ * Return: true if @idmap is allowed, false if not.
+ */
+bool check_fsmapping(const struct mnt_idmap *idmap,
+ const struct super_block *sb)
+{
+ return idmap->owner != sb->s_user_ns;
+}
+
+/**
+ * initial_idmapping - check whether this is the initial mapping
+ * @ns: idmapping to check
+ *
+ * Check whether this is the initial mapping, mapping 0 to 0, 1 to 1,
+ * [...], 1000 to 1000 [...].
+ *
+ * Return: true if this is the initial mapping, false if not.
+ */
+static inline bool initial_idmapping(const struct user_namespace *ns)
+{
+ return ns == &init_user_ns;
+}
+
+/**
+ * no_idmapping - check whether we can skip remapping a kuid/gid
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ *
+ * This function can be used to check whether a remapping between two
+ * idmappings is required.
+ * An idmapped mount is a mount that has an idmapping attached to it that
+ * is different from the filsystem's idmapping and the initial idmapping.
+ * If the initial mapping is used or the idmapping of the mount and the
+ * filesystem are identical no remapping is required.
+ *
+ * Return: true if remapping can be skipped, false if not.
+ */
+static inline bool no_idmapping(const struct user_namespace *mnt_userns,
+ const struct user_namespace *fs_userns)
+{
+ return initial_idmapping(mnt_userns) || mnt_userns == fs_userns;
+}
+
+/**
+ * make_vfsuid - map a filesystem kuid according to an idmapping
+ * @idmap: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @kuid : kuid to be mapped
+ *
+ * Take a @kuid and remap it from @fs_userns into @idmap. Use this
+ * function when preparing a @kuid to be reported to userspace.
+ *
+ * If no_idmapping() determines that this is not an idmapped mount we can
+ * simply return @kuid unchanged.
+ * If initial_idmapping() tells us that the filesystem is not mounted with an
+ * idmapping we know the value of @kuid won't change when calling
+ * from_kuid() so we can simply retrieve the value via __kuid_val()
+ * directly.
+ *
+ * Return: @kuid mapped according to @idmap.
+ * If @kuid has no mapping in either @idmap or @fs_userns INVALID_UID is
+ * returned.
+ */
+
+vfsuid_t make_vfsuid(struct mnt_idmap *idmap,
+ struct user_namespace *fs_userns,
+ kuid_t kuid)
+{
+ uid_t uid;
+ struct user_namespace *mnt_userns = idmap->owner;
+
+ if (no_idmapping(mnt_userns, fs_userns))
+ return VFSUIDT_INIT(kuid);
+ if (initial_idmapping(fs_userns))
+ uid = __kuid_val(kuid);
+ else
+ uid = from_kuid(fs_userns, kuid);
+ if (uid == (uid_t)-1)
+ return INVALID_VFSUID;
+ return VFSUIDT_INIT(make_kuid(mnt_userns, uid));
+}
+EXPORT_SYMBOL_GPL(make_vfsuid);
+
+/**
+ * make_vfsgid - map a filesystem kgid according to an idmapping
+ * @idmap: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @kgid : kgid to be mapped
+ *
+ * Take a @kgid and remap it from @fs_userns into @idmap. Use this
+ * function when preparing a @kgid to be reported to userspace.
+ *
+ * If no_idmapping() determines that this is not an idmapped mount we can
+ * simply return @kgid unchanged.
+ * If initial_idmapping() tells us that the filesystem is not mounted with an
+ * idmapping we know the value of @kgid won't change when calling
+ * from_kgid() so we can simply retrieve the value via __kgid_val()
+ * directly.
+ *
+ * Return: @kgid mapped according to @idmap.
+ * If @kgid has no mapping in either @idmap or @fs_userns INVALID_GID is
+ * returned.
+ */
+vfsgid_t make_vfsgid(struct mnt_idmap *idmap,
+ struct user_namespace *fs_userns, kgid_t kgid)
+{
+ gid_t gid;
+ struct user_namespace *mnt_userns = idmap->owner;
+
+ if (no_idmapping(mnt_userns, fs_userns))
+ return VFSGIDT_INIT(kgid);
+ if (initial_idmapping(fs_userns))
+ gid = __kgid_val(kgid);
+ else
+ gid = from_kgid(fs_userns, kgid);
+ if (gid == (gid_t)-1)
+ return INVALID_VFSGID;
+ return VFSGIDT_INIT(make_kgid(mnt_userns, gid));
+}
+EXPORT_SYMBOL_GPL(make_vfsgid);
+
+/**
+ * from_vfsuid - map a vfsuid into the filesystem idmapping
+ * @idmap: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @vfsuid : vfsuid to be mapped
+ *
+ * Map @vfsuid into the filesystem idmapping. This function has to be used in
+ * order to e.g. write @vfsuid to inode->i_uid.
+ *
+ * Return: @vfsuid mapped into the filesystem idmapping
+ */
+kuid_t from_vfsuid(struct mnt_idmap *idmap,
+ struct user_namespace *fs_userns, vfsuid_t vfsuid)
+{
+ uid_t uid;
+ struct user_namespace *mnt_userns = idmap->owner;
+
+ if (no_idmapping(mnt_userns, fs_userns))
+ return AS_KUIDT(vfsuid);
+ uid = from_kuid(mnt_userns, AS_KUIDT(vfsuid));
+ if (uid == (uid_t)-1)
+ return INVALID_UID;
+ if (initial_idmapping(fs_userns))
+ return KUIDT_INIT(uid);
+ return make_kuid(fs_userns, uid);
+}
+EXPORT_SYMBOL_GPL(from_vfsuid);
+
+/**
+ * from_vfsgid - map a vfsgid into the filesystem idmapping
+ * @idmap: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @vfsgid : vfsgid to be mapped
+ *
+ * Map @vfsgid into the filesystem idmapping. This function has to be used in
+ * order to e.g. write @vfsgid to inode->i_gid.
+ *
+ * Return: @vfsgid mapped into the filesystem idmapping
+ */
+kgid_t from_vfsgid(struct mnt_idmap *idmap,
+ struct user_namespace *fs_userns, vfsgid_t vfsgid)
+{
+ gid_t gid;
+ struct user_namespace *mnt_userns = idmap->owner;
+
+ if (no_idmapping(mnt_userns, fs_userns))
+ return AS_KGIDT(vfsgid);
+ gid = from_kgid(mnt_userns, AS_KGIDT(vfsgid));
+ if (gid == (gid_t)-1)
+ return INVALID_GID;
+ if (initial_idmapping(fs_userns))
+ return KGIDT_INIT(gid);
+ return make_kgid(fs_userns, gid);
+}
+EXPORT_SYMBOL_GPL(from_vfsgid);
+
+#ifdef CONFIG_MULTIUSER
+/**
+ * vfsgid_in_group_p() - check whether a vfsuid matches the caller's groups
+ * @vfsgid: the mnt gid to match
+ *
+ * This function can be used to determine whether @vfsuid matches any of the
+ * caller's groups.
+ *
+ * Return: 1 if vfsuid matches caller's groups, 0 if not.
+ */
+int vfsgid_in_group_p(vfsgid_t vfsgid)
+{
+ return in_group_p(AS_KGIDT(vfsgid));
+}
+#else
+int vfsgid_in_group_p(vfsgid_t vfsgid)
+{
+ return 1;
+}
+#endif
+EXPORT_SYMBOL_GPL(vfsgid_in_group_p);
+
+struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns)
+{
+ struct mnt_idmap *idmap;
+
+ idmap = kzalloc(sizeof(struct mnt_idmap), GFP_KERNEL_ACCOUNT);
+ if (!idmap)
+ return ERR_PTR(-ENOMEM);
+
+ idmap->owner = get_user_ns(mnt_userns);
+ refcount_set(&idmap->count, 1);
+ return idmap;
+}
+
+/**
+ * mnt_idmap_get - get a reference to an idmapping
+ * @idmap: the idmap to bump the reference on
+ *
+ * If @idmap is not the @nop_mnt_idmap bump the reference count.
+ *
+ * Return: @idmap with reference count bumped if @not_mnt_idmap isn't passed.
+ */
+struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap)
+{
+ if (idmap != &nop_mnt_idmap)
+ refcount_inc(&idmap->count);
+
+ return idmap;
+}
+
+/**
+ * mnt_idmap_put - put a reference to an idmapping
+ * @idmap: the idmap to put the reference on
+ *
+ * If this is a non-initial idmapping, put the reference count when a mount is
+ * released and free it if we're the last user.
+ */
+void mnt_idmap_put(struct mnt_idmap *idmap)
+{
+ if (idmap != &nop_mnt_idmap && refcount_dec_and_test(&idmap->count)) {
+ put_user_ns(idmap->owner);
+ kfree(idmap);
+ }
+}
diff --git a/fs/mpage.c b/fs/mpage.c
index 0f8ae954a579..ce53179428db 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -532,6 +532,8 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
map_bh.b_size = 1 << blkbits;
if (mpd->get_block(inode, block_in_file, &map_bh, 1))
goto confused;
+ if (!buffer_mapped(&map_bh))
+ goto confused;
if (buffer_new(&map_bh))
clean_bdev_bh_alias(&map_bh);
if (buffer_boundary(&map_bh)) {
diff --git a/fs/namei.c b/fs/namei.c
index 309ae6fc8c99..5855dc6edbd5 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -20,6 +20,7 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/namei.h>
#include <linux/pagemap.h>
#include <linux/sched/mm.h>
@@ -273,7 +274,7 @@ void putname(struct filename *name)
/**
* check_acl - perform ACL permission checking
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @inode: inode to check permissions on
* @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
*
@@ -281,13 +282,13 @@ void putname(struct filename *name)
* retrieve POSIX acls it needs to know whether it is called from a blocking or
* non-blocking context and thus cares about the MAY_NOT_BLOCK bit.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then take
- * care to map the inode according to @mnt_userns before checking permissions.
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then take
+ * care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs init_user_ns.
+ * raw inode simply passs @nop_mnt_idmap.
*/
-static int check_acl(struct user_namespace *mnt_userns,
+static int check_acl(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
#ifdef CONFIG_FS_POSIX_ACL
@@ -300,14 +301,14 @@ static int check_acl(struct user_namespace *mnt_userns,
/* no ->get_inode_acl() calls in RCU mode... */
if (is_uncached_acl(acl))
return -ECHILD;
- return posix_acl_permission(mnt_userns, inode, acl, mask);
+ return posix_acl_permission(idmap, inode, acl, mask);
}
acl = get_inode_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl) {
- int error = posix_acl_permission(mnt_userns, inode, acl, mask);
+ int error = posix_acl_permission(idmap, inode, acl, mask);
posix_acl_release(acl);
return error;
}
@@ -318,7 +319,7 @@ static int check_acl(struct user_namespace *mnt_userns,
/**
* acl_permission_check - perform basic UNIX permission checking
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @inode: inode to check permissions on
* @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
*
@@ -326,20 +327,20 @@ static int check_acl(struct user_namespace *mnt_userns,
* function may retrieve POSIX acls it needs to know whether it is called from a
* blocking or non-blocking context and thus cares about the MAY_NOT_BLOCK bit.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then take
- * care to map the inode according to @mnt_userns before checking permissions.
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then take
+ * care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs init_user_ns.
+ * raw inode simply passs @nop_mnt_idmap.
*/
-static int acl_permission_check(struct user_namespace *mnt_userns,
+static int acl_permission_check(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
unsigned int mode = inode->i_mode;
vfsuid_t vfsuid;
/* Are we the owner? If so, ACL's don't matter */
- vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
+ vfsuid = i_uid_into_vfsuid(idmap, inode);
if (likely(vfsuid_eq_kuid(vfsuid, current_fsuid()))) {
mask &= 7;
mode >>= 6;
@@ -348,7 +349,7 @@ static int acl_permission_check(struct user_namespace *mnt_userns,
/* Do we have ACL's? */
if (IS_POSIXACL(inode) && (mode & S_IRWXG)) {
- int error = check_acl(mnt_userns, inode, mask);
+ int error = check_acl(idmap, inode, mask);
if (error != -EAGAIN)
return error;
}
@@ -362,7 +363,7 @@ static int acl_permission_check(struct user_namespace *mnt_userns,
* about? Need to check group ownership if so.
*/
if (mask & (mode ^ (mode >> 3))) {
- vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
+ vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
if (vfsgid_in_group_p(vfsgid))
mode >>= 3;
}
@@ -373,7 +374,7 @@ static int acl_permission_check(struct user_namespace *mnt_userns,
/**
* generic_permission - check for access rights on a Posix-like filesystem
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @inode: inode to check access rights for
* @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC,
* %MAY_NOT_BLOCK ...)
@@ -387,13 +388,13 @@ static int acl_permission_check(struct user_namespace *mnt_userns,
* request cannot be satisfied (eg. requires blocking or too much complexity).
* It would then be called again in ref-walk mode.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then take
- * care to map the inode according to @mnt_userns before checking permissions.
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then take
+ * care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs init_user_ns.
+ * raw inode simply passs @nop_mnt_idmap.
*/
-int generic_permission(struct user_namespace *mnt_userns, struct inode *inode,
+int generic_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask)
{
int ret;
@@ -401,17 +402,17 @@ int generic_permission(struct user_namespace *mnt_userns, struct inode *inode,
/*
* Do the basic permission checks.
*/
- ret = acl_permission_check(mnt_userns, inode, mask);
+ ret = acl_permission_check(idmap, inode, mask);
if (ret != -EACCES)
return ret;
if (S_ISDIR(inode->i_mode)) {
/* DACs are overridable for directories */
if (!(mask & MAY_WRITE))
- if (capable_wrt_inode_uidgid(mnt_userns, inode,
+ if (capable_wrt_inode_uidgid(idmap, inode,
CAP_DAC_READ_SEARCH))
return 0;
- if (capable_wrt_inode_uidgid(mnt_userns, inode,
+ if (capable_wrt_inode_uidgid(idmap, inode,
CAP_DAC_OVERRIDE))
return 0;
return -EACCES;
@@ -422,7 +423,7 @@ int generic_permission(struct user_namespace *mnt_userns, struct inode *inode,
*/
mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
if (mask == MAY_READ)
- if (capable_wrt_inode_uidgid(mnt_userns, inode,
+ if (capable_wrt_inode_uidgid(idmap, inode,
CAP_DAC_READ_SEARCH))
return 0;
/*
@@ -431,7 +432,7 @@ int generic_permission(struct user_namespace *mnt_userns, struct inode *inode,
* at least one exec bit set.
*/
if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
- if (capable_wrt_inode_uidgid(mnt_userns, inode,
+ if (capable_wrt_inode_uidgid(idmap, inode,
CAP_DAC_OVERRIDE))
return 0;
@@ -441,7 +442,7 @@ EXPORT_SYMBOL(generic_permission);
/**
* do_inode_permission - UNIX permission checking
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @inode: inode to check permissions on
* @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
*
@@ -450,19 +451,19 @@ EXPORT_SYMBOL(generic_permission);
* flag in inode->i_opflags, that says "this has not special
* permission function, use the fast case".
*/
-static inline int do_inode_permission(struct user_namespace *mnt_userns,
+static inline int do_inode_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
if (likely(inode->i_op->permission))
- return inode->i_op->permission(mnt_userns, inode, mask);
+ return inode->i_op->permission(idmap, inode, mask);
/* This gets set once for the inode lifetime */
spin_lock(&inode->i_lock);
inode->i_opflags |= IOP_FASTPERM;
spin_unlock(&inode->i_lock);
}
- return generic_permission(mnt_userns, inode, mask);
+ return generic_permission(idmap, inode, mask);
}
/**
@@ -487,7 +488,7 @@ static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
/**
* inode_permission - Check for access rights to a given inode
- * @mnt_userns: User namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @inode: Inode to check permission on
* @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
*
@@ -497,7 +498,7 @@ static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
*
* When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
*/
-int inode_permission(struct user_namespace *mnt_userns,
+int inode_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
int retval;
@@ -518,11 +519,11 @@ int inode_permission(struct user_namespace *mnt_userns,
* written back improperly if their true value is unknown
* to the vfs.
*/
- if (HAS_UNMAPPED_ID(mnt_userns, inode))
+ if (HAS_UNMAPPED_ID(idmap, inode))
return -EACCES;
}
- retval = do_inode_permission(mnt_userns, inode, mask);
+ retval = do_inode_permission(idmap, inode, mask);
if (retval)
return retval;
@@ -1094,14 +1095,14 @@ fs_initcall(init_fs_namei_sysctls);
*/
static inline int may_follow_link(struct nameidata *nd, const struct inode *inode)
{
- struct user_namespace *mnt_userns;
+ struct mnt_idmap *idmap;
vfsuid_t vfsuid;
if (!sysctl_protected_symlinks)
return 0;
- mnt_userns = mnt_user_ns(nd->path.mnt);
- vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
+ idmap = mnt_idmap(nd->path.mnt);
+ vfsuid = i_uid_into_vfsuid(idmap, inode);
/* Allowed if owner and follower match. */
if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
return 0;
@@ -1124,7 +1125,7 @@ static inline int may_follow_link(struct nameidata *nd, const struct inode *inod
/**
* safe_hardlink_source - Check for safe hardlink conditions
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @inode: the source inode to hardlink from
*
* Return false if at least one of the following conditions:
@@ -1135,7 +1136,7 @@ static inline int may_follow_link(struct nameidata *nd, const struct inode *inod
*
* Otherwise returns true.
*/
-static bool safe_hardlink_source(struct user_namespace *mnt_userns,
+static bool safe_hardlink_source(struct mnt_idmap *idmap,
struct inode *inode)
{
umode_t mode = inode->i_mode;
@@ -1153,7 +1154,7 @@ static bool safe_hardlink_source(struct user_namespace *mnt_userns,
return false;
/* Hardlinking to unreadable or unwritable sources is dangerous. */
- if (inode_permission(mnt_userns, inode, MAY_READ | MAY_WRITE))
+ if (inode_permission(idmap, inode, MAY_READ | MAY_WRITE))
return false;
return true;
@@ -1161,8 +1162,8 @@ static bool safe_hardlink_source(struct user_namespace *mnt_userns,
/**
* may_linkat - Check permissions for creating a hardlink
- * @mnt_userns: user namespace of the mount the inode was found from
- * @link: the source to hardlink from
+ * @idmap: idmap of the mount the inode was found from
+ * @link: the source to hardlink from
*
* Block hardlink when all of:
* - sysctl_protected_hardlinks enabled
@@ -1170,21 +1171,21 @@ static bool safe_hardlink_source(struct user_namespace *mnt_userns,
* - hardlink source is unsafe (see safe_hardlink_source() above)
* - not CAP_FOWNER in a namespace with the inode owner uid mapped
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then take
- * care to map the inode according to @mnt_userns before checking permissions.
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then take
+ * care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs init_user_ns.
+ * raw inode simply pass @nop_mnt_idmap.
*
* Returns 0 if successful, -ve on error.
*/
-int may_linkat(struct user_namespace *mnt_userns, const struct path *link)
+int may_linkat(struct mnt_idmap *idmap, const struct path *link)
{
struct inode *inode = link->dentry->d_inode;
/* Inode writeback is not safe when the uid or gid are invalid. */
- if (!vfsuid_valid(i_uid_into_vfsuid(mnt_userns, inode)) ||
- !vfsgid_valid(i_gid_into_vfsgid(mnt_userns, inode)))
+ if (!vfsuid_valid(i_uid_into_vfsuid(idmap, inode)) ||
+ !vfsgid_valid(i_gid_into_vfsgid(idmap, inode)))
return -EOVERFLOW;
if (!sysctl_protected_hardlinks)
@@ -1193,8 +1194,8 @@ int may_linkat(struct user_namespace *mnt_userns, const struct path *link)
/* Source inode owner (or CAP_FOWNER) can hardlink all they like,
* otherwise, it must be a safe source.
*/
- if (safe_hardlink_source(mnt_userns, inode) ||
- inode_owner_or_capable(mnt_userns, inode))
+ if (safe_hardlink_source(idmap, inode) ||
+ inode_owner_or_capable(idmap, inode))
return 0;
audit_log_path_denied(AUDIT_ANOM_LINK, "linkat");
@@ -1205,7 +1206,7 @@ int may_linkat(struct user_namespace *mnt_userns, const struct path *link)
* may_create_in_sticky - Check whether an O_CREAT open in a sticky directory
* should be allowed, or not, on files that already
* exist.
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @nd: nameidata pathwalk data
* @inode: the inode of the file to open
*
@@ -1220,15 +1221,15 @@ int may_linkat(struct user_namespace *mnt_userns, const struct path *link)
* the directory doesn't have to be world writable: being group writable will
* be enough.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then take
- * care to map the inode according to @mnt_userns before checking permissions.
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then take
+ * care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs init_user_ns.
+ * raw inode simply pass @nop_mnt_idmap.
*
* Returns 0 if the open is allowed, -ve on error.
*/
-static int may_create_in_sticky(struct user_namespace *mnt_userns,
+static int may_create_in_sticky(struct mnt_idmap *idmap,
struct nameidata *nd, struct inode *const inode)
{
umode_t dir_mode = nd->dir_mode;
@@ -1237,8 +1238,8 @@ static int may_create_in_sticky(struct user_namespace *mnt_userns,
if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) ||
(!sysctl_protected_regular && S_ISREG(inode->i_mode)) ||
likely(!(dir_mode & S_ISVTX)) ||
- vfsuid_eq(i_uid_into_vfsuid(mnt_userns, inode), dir_vfsuid) ||
- vfsuid_eq_kuid(i_uid_into_vfsuid(mnt_userns, inode), current_fsuid()))
+ vfsuid_eq(i_uid_into_vfsuid(idmap, inode), dir_vfsuid) ||
+ vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid()))
return 0;
if (likely(dir_mode & 0002) ||
@@ -1704,15 +1705,15 @@ static struct dentry *lookup_slow(const struct qstr *name,
return res;
}
-static inline int may_lookup(struct user_namespace *mnt_userns,
+static inline int may_lookup(struct mnt_idmap *idmap,
struct nameidata *nd)
{
if (nd->flags & LOOKUP_RCU) {
- int err = inode_permission(mnt_userns, nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
+ int err = inode_permission(idmap, nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
if (err != -ECHILD || !try_to_unlazy(nd))
return err;
}
- return inode_permission(mnt_userns, nd->inode, MAY_EXEC);
+ return inode_permission(idmap, nd->inode, MAY_EXEC);
}
static int reserve_stack(struct nameidata *nd, struct path *link)
@@ -2253,13 +2254,13 @@ static int link_path_walk(const char *name, struct nameidata *nd)
/* At this point we know we have a real path component. */
for(;;) {
- struct user_namespace *mnt_userns;
+ struct mnt_idmap *idmap;
const char *link;
u64 hash_len;
int type;
- mnt_userns = mnt_user_ns(nd->path.mnt);
- err = may_lookup(mnt_userns, nd);
+ idmap = mnt_idmap(nd->path.mnt);
+ err = may_lookup(idmap, nd);
if (err)
return err;
@@ -2307,7 +2308,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
OK:
/* pathname or trailing symlink, done */
if (!depth) {
- nd->dir_vfsuid = i_uid_into_vfsuid(mnt_userns, nd->inode);
+ nd->dir_vfsuid = i_uid_into_vfsuid(idmap, nd->inode);
nd->dir_mode = nd->inode->i_mode;
nd->flags &= ~LOOKUP_PARENT;
return 0;
@@ -2622,7 +2623,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
}
EXPORT_SYMBOL(vfs_path_lookup);
-static int lookup_one_common(struct user_namespace *mnt_userns,
+static int lookup_one_common(struct mnt_idmap *idmap,
const char *name, struct dentry *base, int len,
struct qstr *this)
{
@@ -2652,7 +2653,7 @@ static int lookup_one_common(struct user_namespace *mnt_userns,
return err;
}
- return inode_permission(mnt_userns, base->d_inode, MAY_EXEC);
+ return inode_permission(idmap, base->d_inode, MAY_EXEC);
}
/**
@@ -2676,7 +2677,7 @@ struct dentry *try_lookup_one_len(const char *name, struct dentry *base, int len
WARN_ON_ONCE(!inode_is_locked(base->d_inode));
- err = lookup_one_common(&init_user_ns, name, base, len, &this);
+ err = lookup_one_common(&nop_mnt_idmap, name, base, len, &this);
if (err)
return ERR_PTR(err);
@@ -2703,7 +2704,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
WARN_ON_ONCE(!inode_is_locked(base->d_inode));
- err = lookup_one_common(&init_user_ns, name, base, len, &this);
+ err = lookup_one_common(&nop_mnt_idmap, name, base, len, &this);
if (err)
return ERR_PTR(err);
@@ -2714,7 +2715,7 @@ EXPORT_SYMBOL(lookup_one_len);
/**
* lookup_one - filesystem helper to lookup single pathname component
- * @mnt_userns: user namespace of the mount the lookup is performed from
+ * @idmap: idmap of the mount the lookup is performed from
* @name: pathname component to lookup
* @base: base directory to lookup from
* @len: maximum length @len should be interpreted to
@@ -2724,7 +2725,7 @@ EXPORT_SYMBOL(lookup_one_len);
*
* The caller must hold base->i_mutex.
*/
-struct dentry *lookup_one(struct user_namespace *mnt_userns, const char *name,
+struct dentry *lookup_one(struct mnt_idmap *idmap, const char *name,
struct dentry *base, int len)
{
struct dentry *dentry;
@@ -2733,7 +2734,7 @@ struct dentry *lookup_one(struct user_namespace *mnt_userns, const char *name,
WARN_ON_ONCE(!inode_is_locked(base->d_inode));
- err = lookup_one_common(mnt_userns, name, base, len, &this);
+ err = lookup_one_common(idmap, name, base, len, &this);
if (err)
return ERR_PTR(err);
@@ -2744,7 +2745,7 @@ EXPORT_SYMBOL(lookup_one);
/**
* lookup_one_unlocked - filesystem helper to lookup single pathname component
- * @mnt_userns: idmapping of the mount the lookup is performed from
+ * @idmap: idmap of the mount the lookup is performed from
* @name: pathname component to lookup
* @base: base directory to lookup from
* @len: maximum length @len should be interpreted to
@@ -2755,7 +2756,7 @@ EXPORT_SYMBOL(lookup_one);
* Unlike lookup_one_len, it should be called without the parent
* i_mutex held, and will take the i_mutex itself if necessary.
*/
-struct dentry *lookup_one_unlocked(struct user_namespace *mnt_userns,
+struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap,
const char *name, struct dentry *base,
int len)
{
@@ -2763,7 +2764,7 @@ struct dentry *lookup_one_unlocked(struct user_namespace *mnt_userns,
int err;
struct dentry *ret;
- err = lookup_one_common(mnt_userns, name, base, len, &this);
+ err = lookup_one_common(idmap, name, base, len, &this);
if (err)
return ERR_PTR(err);
@@ -2777,7 +2778,7 @@ EXPORT_SYMBOL(lookup_one_unlocked);
/**
* lookup_one_positive_unlocked - filesystem helper to lookup single
* pathname component
- * @mnt_userns: idmapping of the mount the lookup is performed from
+ * @idmap: idmap of the mount the lookup is performed from
* @name: pathname component to lookup
* @base: base directory to lookup from
* @len: maximum length @len should be interpreted to
@@ -2794,11 +2795,11 @@ EXPORT_SYMBOL(lookup_one_unlocked);
*
* The helper should be called without i_mutex held.
*/
-struct dentry *lookup_one_positive_unlocked(struct user_namespace *mnt_userns,
+struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap,
const char *name,
struct dentry *base, int len)
{
- struct dentry *ret = lookup_one_unlocked(mnt_userns, name, base, len);
+ struct dentry *ret = lookup_one_unlocked(idmap, name, base, len);
if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
dput(ret);
@@ -2823,7 +2824,7 @@ EXPORT_SYMBOL(lookup_one_positive_unlocked);
struct dentry *lookup_one_len_unlocked(const char *name,
struct dentry *base, int len)
{
- return lookup_one_unlocked(&init_user_ns, name, base, len);
+ return lookup_one_unlocked(&nop_mnt_idmap, name, base, len);
}
EXPORT_SYMBOL(lookup_one_len_unlocked);
@@ -2838,7 +2839,7 @@ EXPORT_SYMBOL(lookup_one_len_unlocked);
struct dentry *lookup_positive_unlocked(const char *name,
struct dentry *base, int len)
{
- return lookup_one_positive_unlocked(&init_user_ns, name, base, len);
+ return lookup_one_positive_unlocked(&nop_mnt_idmap, name, base, len);
}
EXPORT_SYMBOL(lookup_positive_unlocked);
@@ -2880,16 +2881,16 @@ int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
}
EXPORT_SYMBOL(user_path_at_empty);
-int __check_sticky(struct user_namespace *mnt_userns, struct inode *dir,
+int __check_sticky(struct mnt_idmap *idmap, struct inode *dir,
struct inode *inode)
{
kuid_t fsuid = current_fsuid();
- if (vfsuid_eq_kuid(i_uid_into_vfsuid(mnt_userns, inode), fsuid))
+ if (vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), fsuid))
return 0;
- if (vfsuid_eq_kuid(i_uid_into_vfsuid(mnt_userns, dir), fsuid))
+ if (vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, dir), fsuid))
return 0;
- return !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FOWNER);
+ return !capable_wrt_inode_uidgid(idmap, inode, CAP_FOWNER);
}
EXPORT_SYMBOL(__check_sticky);
@@ -2913,7 +2914,7 @@ EXPORT_SYMBOL(__check_sticky);
* 11. We don't allow removal of NFS sillyrenamed files; it's handled by
* nfs_async_unlink().
*/
-static int may_delete(struct user_namespace *mnt_userns, struct inode *dir,
+static int may_delete(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *victim, bool isdir)
{
struct inode *inode = d_backing_inode(victim);
@@ -2926,21 +2927,21 @@ static int may_delete(struct user_namespace *mnt_userns, struct inode *dir,
BUG_ON(victim->d_parent->d_inode != dir);
/* Inode writeback is not safe when the uid or gid are invalid. */
- if (!vfsuid_valid(i_uid_into_vfsuid(mnt_userns, inode)) ||
- !vfsgid_valid(i_gid_into_vfsgid(mnt_userns, inode)))
+ if (!vfsuid_valid(i_uid_into_vfsuid(idmap, inode)) ||
+ !vfsgid_valid(i_gid_into_vfsgid(idmap, inode)))
return -EOVERFLOW;
audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
- error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
+ error = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
if (error)
return error;
if (IS_APPEND(dir))
return -EPERM;
- if (check_sticky(mnt_userns, dir, inode) || IS_APPEND(inode) ||
+ if (check_sticky(idmap, dir, inode) || IS_APPEND(inode) ||
IS_IMMUTABLE(inode) || IS_SWAPFILE(inode) ||
- HAS_UNMAPPED_ID(mnt_userns, inode))
+ HAS_UNMAPPED_ID(idmap, inode))
return -EPERM;
if (isdir) {
if (!d_is_dir(victim))
@@ -2965,7 +2966,7 @@ static int may_delete(struct user_namespace *mnt_userns, struct inode *dir,
* 4. We should have write and exec permissions on dir
* 5. We can't do it if dir is immutable (done in permission())
*/
-static inline int may_create(struct user_namespace *mnt_userns,
+static inline int may_create(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *child)
{
audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
@@ -2973,10 +2974,10 @@ static inline int may_create(struct user_namespace *mnt_userns,
return -EEXIST;
if (IS_DEADDIR(dir))
return -ENOENT;
- if (!fsuidgid_has_mapping(dir->i_sb, mnt_userns))
+ if (!fsuidgid_has_mapping(dir->i_sb, idmap))
return -EOVERFLOW;
- return inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
+ return inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
}
/*
@@ -3044,7 +3045,7 @@ static inline umode_t mode_strip_umask(const struct inode *dir, umode_t mode)
/**
* vfs_prepare_mode - prepare the mode to be used for a new inode
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dir: parent directory of the new inode
* @mode: mode of the new inode
* @mask_perms: allowed permission by the vfs
@@ -3065,11 +3066,11 @@ static inline umode_t mode_strip_umask(const struct inode *dir, umode_t mode)
*
* Returns: mode to be passed to the filesystem
*/
-static inline umode_t vfs_prepare_mode(struct user_namespace *mnt_userns,
+static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap,
const struct inode *dir, umode_t mode,
umode_t mask_perms, umode_t type)
{
- mode = mode_strip_sgid(mnt_userns, dir, mode);
+ mode = mode_strip_sgid(idmap, dir, mode);
mode = mode_strip_umask(dir, mode);
/*
@@ -3084,7 +3085,7 @@ static inline umode_t vfs_prepare_mode(struct user_namespace *mnt_userns,
/**
* vfs_create - create new file
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dir: inode of @dentry
* @dentry: pointer to dentry of the base directory
* @mode: mode of the new file
@@ -3092,27 +3093,29 @@ static inline umode_t vfs_prepare_mode(struct user_namespace *mnt_userns,
*
* Create a new file.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then take
- * care to map the inode according to @mnt_userns before checking permissions.
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then take
+ * care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs init_user_ns.
+ * raw inode simply passs @nop_mnt_idmap.
*/
-int vfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+int vfs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool want_excl)
{
- int error = may_create(mnt_userns, dir, dentry);
+ int error;
+
+ error = may_create(idmap, dir, dentry);
if (error)
return error;
if (!dir->i_op->create)
return -EACCES; /* shouldn't it be ENOSYS? */
- mode = vfs_prepare_mode(mnt_userns, dir, mode, S_IALLUGO, S_IFREG);
+ mode = vfs_prepare_mode(idmap, dir, mode, S_IALLUGO, S_IFREG);
error = security_inode_create(dir, dentry, mode);
if (error)
return error;
- error = dir->i_op->create(mnt_userns, dir, dentry, mode, want_excl);
+ error = dir->i_op->create(idmap, dir, dentry, mode, want_excl);
if (!error)
fsnotify_create(dir, dentry);
return error;
@@ -3124,7 +3127,7 @@ int vfs_mkobj(struct dentry *dentry, umode_t mode,
void *arg)
{
struct inode *dir = dentry->d_parent->d_inode;
- int error = may_create(&init_user_ns, dir, dentry);
+ int error = may_create(&nop_mnt_idmap, dir, dentry);
if (error)
return error;
@@ -3146,7 +3149,7 @@ bool may_open_dev(const struct path *path)
!(path->mnt->mnt_sb->s_iflags & SB_I_NODEV);
}
-static int may_open(struct user_namespace *mnt_userns, const struct path *path,
+static int may_open(struct mnt_idmap *idmap, const struct path *path,
int acc_mode, int flag)
{
struct dentry *dentry = path->dentry;
@@ -3182,7 +3185,7 @@ static int may_open(struct user_namespace *mnt_userns, const struct path *path,
break;
}
- error = inode_permission(mnt_userns, inode, MAY_OPEN | acc_mode);
+ error = inode_permission(idmap, inode, MAY_OPEN | acc_mode);
if (error)
return error;
@@ -3197,13 +3200,13 @@ static int may_open(struct user_namespace *mnt_userns, const struct path *path,
}
/* O_NOATIME can only be set by the owner or superuser */
- if (flag & O_NOATIME && !inode_owner_or_capable(mnt_userns, inode))
+ if (flag & O_NOATIME && !inode_owner_or_capable(idmap, inode))
return -EPERM;
return 0;
}
-static int handle_truncate(struct user_namespace *mnt_userns, struct file *filp)
+static int handle_truncate(struct mnt_idmap *idmap, struct file *filp)
{
const struct path *path = &filp->f_path;
struct inode *inode = path->dentry->d_inode;
@@ -3213,7 +3216,7 @@ static int handle_truncate(struct user_namespace *mnt_userns, struct file *filp)
error = security_file_truncate(filp);
if (!error) {
- error = do_truncate(mnt_userns, path->dentry, 0,
+ error = do_truncate(idmap, path->dentry, 0,
ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
filp);
}
@@ -3228,7 +3231,7 @@ static inline int open_to_namei_flags(int flag)
return flag;
}
-static int may_o_create(struct user_namespace *mnt_userns,
+static int may_o_create(struct mnt_idmap *idmap,
const struct path *dir, struct dentry *dentry,
umode_t mode)
{
@@ -3236,10 +3239,10 @@ static int may_o_create(struct user_namespace *mnt_userns,
if (error)
return error;
- if (!fsuidgid_has_mapping(dir->dentry->d_sb, mnt_userns))
+ if (!fsuidgid_has_mapping(dir->dentry->d_sb, idmap))
return -EOVERFLOW;
- error = inode_permission(mnt_userns, dir->dentry->d_inode,
+ error = inode_permission(idmap, dir->dentry->d_inode,
MAY_WRITE | MAY_EXEC);
if (error)
return error;
@@ -3319,7 +3322,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
const struct open_flags *op,
bool got_write)
{
- struct user_namespace *mnt_userns;
+ struct mnt_idmap *idmap;
struct dentry *dir = nd->path.dentry;
struct inode *dir_inode = dir->d_inode;
int open_flag = op->open_flag;
@@ -3367,13 +3370,13 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
*/
if (unlikely(!got_write))
open_flag &= ~O_TRUNC;
- mnt_userns = mnt_user_ns(nd->path.mnt);
+ idmap = mnt_idmap(nd->path.mnt);
if (open_flag & O_CREAT) {
if (open_flag & O_EXCL)
open_flag &= ~O_TRUNC;
- mode = vfs_prepare_mode(mnt_userns, dir->d_inode, mode, mode, mode);
+ mode = vfs_prepare_mode(idmap, dir->d_inode, mode, mode, mode);
if (likely(got_write))
- create_error = may_o_create(mnt_userns, &nd->path,
+ create_error = may_o_create(idmap, &nd->path,
dentry, mode);
else
create_error = -EROFS;
@@ -3410,7 +3413,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
goto out_dput;
}
- error = dir_inode->i_op->create(mnt_userns, dir_inode, dentry,
+ error = dir_inode->i_op->create(idmap, dir_inode, dentry,
mode, open_flag & O_EXCL);
if (error)
goto out_dput;
@@ -3513,7 +3516,7 @@ finish_lookup:
static int do_open(struct nameidata *nd,
struct file *file, const struct open_flags *op)
{
- struct user_namespace *mnt_userns;
+ struct mnt_idmap *idmap;
int open_flag = op->open_flag;
bool do_truncate;
int acc_mode;
@@ -3526,13 +3529,13 @@ static int do_open(struct nameidata *nd,
}
if (!(file->f_mode & FMODE_CREATED))
audit_inode(nd->name, nd->path.dentry, 0);
- mnt_userns = mnt_user_ns(nd->path.mnt);
+ idmap = mnt_idmap(nd->path.mnt);
if (open_flag & O_CREAT) {
if ((open_flag & O_EXCL) && !(file->f_mode & FMODE_CREATED))
return -EEXIST;
if (d_is_dir(nd->path.dentry))
return -EISDIR;
- error = may_create_in_sticky(mnt_userns, nd,
+ error = may_create_in_sticky(idmap, nd,
d_backing_inode(nd->path.dentry));
if (unlikely(error))
return error;
@@ -3552,13 +3555,13 @@ static int do_open(struct nameidata *nd,
return error;
do_truncate = true;
}
- error = may_open(mnt_userns, &nd->path, acc_mode, open_flag);
+ error = may_open(idmap, &nd->path, acc_mode, open_flag);
if (!error && !(file->f_mode & FMODE_OPENED))
error = vfs_open(&nd->path, file);
if (!error)
error = ima_file_check(file, op->acc_mode);
if (!error && do_truncate)
- error = handle_truncate(mnt_userns, file);
+ error = handle_truncate(idmap, file);
if (unlikely(error > 0)) {
WARN_ON(1);
error = -EINVAL;
@@ -3570,20 +3573,20 @@ static int do_open(struct nameidata *nd,
/**
* vfs_tmpfile - create tmpfile
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dentry: pointer to dentry of the base directory
* @mode: mode of the new tmpfile
* @open_flag: flags
*
* Create a temporary file.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then take
- * care to map the inode according to @mnt_userns before checking permissions.
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then take
+ * care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs init_user_ns.
+ * raw inode simply passs @nop_mnt_idmap.
*/
-static int vfs_tmpfile(struct user_namespace *mnt_userns,
+static int vfs_tmpfile(struct mnt_idmap *idmap,
const struct path *parentpath,
struct file *file, umode_t mode)
{
@@ -3594,7 +3597,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns,
int open_flag = file->f_flags;
/* we want directory to be writable */
- error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
+ error = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
if (error)
return error;
if (!dir->i_op->tmpfile)
@@ -3604,13 +3607,13 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns,
return -ENOMEM;
file->f_path.mnt = parentpath->mnt;
file->f_path.dentry = child;
- mode = vfs_prepare_mode(mnt_userns, dir, mode, mode, mode);
- error = dir->i_op->tmpfile(mnt_userns, dir, file, mode);
+ mode = vfs_prepare_mode(idmap, dir, mode, mode, mode);
+ error = dir->i_op->tmpfile(idmap, dir, file, mode);
dput(child);
if (error)
return error;
/* Don't check for other permissions, the inode was just created */
- error = may_open(mnt_userns, &file->f_path, 0, file->f_flags);
+ error = may_open(idmap, &file->f_path, 0, file->f_flags);
if (error)
return error;
inode = file_inode(file);
@@ -3619,13 +3622,13 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns,
inode->i_state |= I_LINKABLE;
spin_unlock(&inode->i_lock);
}
- ima_post_create_tmpfile(mnt_userns, inode);
+ ima_post_create_tmpfile(idmap, inode);
return 0;
}
/**
* vfs_tmpfile_open - open a tmpfile for kernel internal use
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @parentpath: path of the base directory
* @mode: mode of the new tmpfile
* @open_flag: flags
@@ -3635,7 +3638,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns,
* hence this is only for kernel internal use, and must not be installed into
* file tables or such.
*/
-struct file *vfs_tmpfile_open(struct user_namespace *mnt_userns,
+struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
const struct path *parentpath,
umode_t mode, int open_flag, const struct cred *cred)
{
@@ -3644,7 +3647,7 @@ struct file *vfs_tmpfile_open(struct user_namespace *mnt_userns,
file = alloc_empty_file_noaccount(open_flag, cred);
if (!IS_ERR(file)) {
- error = vfs_tmpfile(mnt_userns, parentpath, file, mode);
+ error = vfs_tmpfile(idmap, parentpath, file, mode);
if (error) {
fput(file);
file = ERR_PTR(error);
@@ -3658,7 +3661,6 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
const struct open_flags *op,
struct file *file)
{
- struct user_namespace *mnt_userns;
struct path path;
int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path);
@@ -3667,8 +3669,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
error = mnt_want_write(path.mnt);
if (unlikely(error))
goto out;
- mnt_userns = mnt_user_ns(path.mnt);
- error = vfs_tmpfile(mnt_userns, &path, file, op->mode);
+ error = vfs_tmpfile(mnt_idmap(path.mnt), &path, file, op->mode);
if (error)
goto out2;
audit_inode(nd->name, file->f_path.dentry, 0);
@@ -3873,7 +3874,7 @@ EXPORT_SYMBOL(user_path_create);
/**
* vfs_mknod - create device node or file
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dir: inode of @dentry
* @dentry: pointer to dentry of the base directory
* @mode: mode of the new device node or file
@@ -3881,17 +3882,17 @@ EXPORT_SYMBOL(user_path_create);
*
* Create a device node or file.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then take
- * care to map the inode according to @mnt_userns before checking permissions.
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then take
+ * care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs init_user_ns.
+ * raw inode simply passs @nop_mnt_idmap.
*/
-int vfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t dev)
{
bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
- int error = may_create(mnt_userns, dir, dentry);
+ int error = may_create(idmap, dir, dentry);
if (error)
return error;
@@ -3903,7 +3904,7 @@ int vfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
if (!dir->i_op->mknod)
return -EPERM;
- mode = vfs_prepare_mode(mnt_userns, dir, mode, mode, mode);
+ mode = vfs_prepare_mode(idmap, dir, mode, mode, mode);
error = devcgroup_inode_mknod(mode, dev);
if (error)
return error;
@@ -3912,7 +3913,7 @@ int vfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
if (error)
return error;
- error = dir->i_op->mknod(mnt_userns, dir, dentry, mode, dev);
+ error = dir->i_op->mknod(idmap, dir, dentry, mode, dev);
if (!error)
fsnotify_create(dir, dentry);
return error;
@@ -3939,7 +3940,7 @@ static int may_mknod(umode_t mode)
static int do_mknodat(int dfd, struct filename *name, umode_t mode,
unsigned int dev)
{
- struct user_namespace *mnt_userns;
+ struct mnt_idmap *idmap;
struct dentry *dentry;
struct path path;
int error;
@@ -3959,20 +3960,20 @@ retry:
if (error)
goto out2;
- mnt_userns = mnt_user_ns(path.mnt);
+ idmap = mnt_idmap(path.mnt);
switch (mode & S_IFMT) {
case 0: case S_IFREG:
- error = vfs_create(mnt_userns, path.dentry->d_inode,
+ error = vfs_create(idmap, path.dentry->d_inode,
dentry, mode, true);
if (!error)
- ima_post_path_mknod(mnt_userns, dentry);
+ ima_post_path_mknod(idmap, dentry);
break;
case S_IFCHR: case S_IFBLK:
- error = vfs_mknod(mnt_userns, path.dentry->d_inode,
+ error = vfs_mknod(idmap, path.dentry->d_inode,
dentry, mode, new_decode_dev(dev));
break;
case S_IFIFO: case S_IFSOCK:
- error = vfs_mknod(mnt_userns, path.dentry->d_inode,
+ error = vfs_mknod(idmap, path.dentry->d_inode,
dentry, mode, 0);
break;
}
@@ -4000,32 +4001,33 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
/**
* vfs_mkdir - create directory
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dir: inode of @dentry
* @dentry: pointer to dentry of the base directory
* @mode: mode of the new directory
*
* Create a directory.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then take
- * care to map the inode according to @mnt_userns before checking permissions.
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then take
+ * care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs init_user_ns.
+ * raw inode simply passs @nop_mnt_idmap.
*/
-int vfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
- int error = may_create(mnt_userns, dir, dentry);
+ int error;
unsigned max_links = dir->i_sb->s_max_links;
+ error = may_create(idmap, dir, dentry);
if (error)
return error;
if (!dir->i_op->mkdir)
return -EPERM;
- mode = vfs_prepare_mode(mnt_userns, dir, mode, S_IRWXUGO | S_ISVTX, 0);
+ mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0);
error = security_inode_mkdir(dir, dentry, mode);
if (error)
return error;
@@ -4033,7 +4035,7 @@ int vfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
if (max_links && dir->i_nlink >= max_links)
return -EMLINK;
- error = dir->i_op->mkdir(mnt_userns, dir, dentry, mode);
+ error = dir->i_op->mkdir(idmap, dir, dentry, mode);
if (!error)
fsnotify_mkdir(dir, dentry);
return error;
@@ -4056,10 +4058,8 @@ retry:
error = security_path_mkdir(&path, dentry,
mode_strip_umask(path.dentry->d_inode, mode));
if (!error) {
- struct user_namespace *mnt_userns;
- mnt_userns = mnt_user_ns(path.mnt);
- error = vfs_mkdir(mnt_userns, path.dentry->d_inode, dentry,
- mode);
+ error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
+ dentry, mode);
}
done_path_create(&path, dentry);
if (retry_estale(error, lookup_flags)) {
@@ -4083,22 +4083,22 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
/**
* vfs_rmdir - remove directory
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dir: inode of @dentry
* @dentry: pointer to dentry of the base directory
*
* Remove a directory.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then take
- * care to map the inode according to @mnt_userns before checking permissions.
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then take
+ * care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs init_user_ns.
+ * raw inode simply passs @nop_mnt_idmap.
*/
-int vfs_rmdir(struct user_namespace *mnt_userns, struct inode *dir,
+int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry)
{
- int error = may_delete(mnt_userns, dir, dentry, 1);
+ int error = may_delete(idmap, dir, dentry, 1);
if (error)
return error;
@@ -4138,7 +4138,6 @@ EXPORT_SYMBOL(vfs_rmdir);
int do_rmdir(int dfd, struct filename *name)
{
- struct user_namespace *mnt_userns;
int error;
struct dentry *dentry;
struct path path;
@@ -4178,8 +4177,7 @@ retry:
error = security_path_rmdir(&path, dentry);
if (error)
goto exit4;
- mnt_userns = mnt_user_ns(path.mnt);
- error = vfs_rmdir(mnt_userns, path.dentry->d_inode, dentry);
+ error = vfs_rmdir(mnt_idmap(path.mnt), path.dentry->d_inode, dentry);
exit4:
dput(dentry);
exit3:
@@ -4203,7 +4201,7 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
/**
* vfs_unlink - unlink a filesystem object
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dir: parent directory
* @dentry: victim
* @delegated_inode: returns victim inode, if the inode is delegated.
@@ -4220,17 +4218,17 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
* be appropriate for callers that expect the underlying filesystem not
* to be NFS exported.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then take
- * care to map the inode according to @mnt_userns before checking permissions.
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then take
+ * care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs init_user_ns.
+ * raw inode simply passs @nop_mnt_idmap.
*/
-int vfs_unlink(struct user_namespace *mnt_userns, struct inode *dir,
+int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, struct inode **delegated_inode)
{
struct inode *target = dentry->d_inode;
- int error = may_delete(mnt_userns, dir, dentry, 0);
+ int error = may_delete(idmap, dir, dentry, 0);
if (error)
return error;
@@ -4304,7 +4302,6 @@ retry_deleg:
dentry = __lookup_hash(&last, path.dentry, lookup_flags);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
- struct user_namespace *mnt_userns;
/* Why not before? Because we want correct error value */
if (last.name[last.len])
@@ -4316,9 +4313,8 @@ retry_deleg:
error = security_path_unlink(&path, dentry);
if (error)
goto exit3;
- mnt_userns = mnt_user_ns(path.mnt);
- error = vfs_unlink(mnt_userns, path.dentry->d_inode, dentry,
- &delegated_inode);
+ error = vfs_unlink(mnt_idmap(path.mnt), path.dentry->d_inode,
+ dentry, &delegated_inode);
exit3:
dput(dentry);
}
@@ -4370,24 +4366,25 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname)
/**
* vfs_symlink - create symlink
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dir: inode of @dentry
* @dentry: pointer to dentry of the base directory
* @oldname: name of the file to link to
*
* Create a symlink.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then take
- * care to map the inode according to @mnt_userns before checking permissions.
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then take
+ * care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs init_user_ns.
+ * raw inode simply passs @nop_mnt_idmap.
*/
-int vfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *oldname)
{
- int error = may_create(mnt_userns, dir, dentry);
+ int error;
+ error = may_create(idmap, dir, dentry);
if (error)
return error;
@@ -4398,7 +4395,7 @@ int vfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
if (error)
return error;
- error = dir->i_op->symlink(mnt_userns, dir, dentry, oldname);
+ error = dir->i_op->symlink(idmap, dir, dentry, oldname);
if (!error)
fsnotify_create(dir, dentry);
return error;
@@ -4423,13 +4420,9 @@ retry:
goto out_putnames;
error = security_path_symlink(&path, dentry, from->name);
- if (!error) {
- struct user_namespace *mnt_userns;
-
- mnt_userns = mnt_user_ns(path.mnt);
- error = vfs_symlink(mnt_userns, path.dentry->d_inode, dentry,
- from->name);
- }
+ if (!error)
+ error = vfs_symlink(mnt_idmap(path.mnt), path.dentry->d_inode,
+ dentry, from->name);
done_path_create(&path, dentry);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
@@ -4455,7 +4448,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
/**
* vfs_link - create a new link
* @old_dentry: object to be linked
- * @mnt_userns: the user namespace of the mount
+ * @idmap: idmap of the mount
* @dir: new parent
* @new_dentry: where to create the new link
* @delegated_inode: returns inode needing a delegation break
@@ -4472,13 +4465,13 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
* be appropriate for callers that expect the underlying filesystem not
* to be NFS exported.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then take
- * care to map the inode according to @mnt_userns before checking permissions.
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then take
+ * care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs init_user_ns.
+ * raw inode simply passs @nop_mnt_idmap.
*/
-int vfs_link(struct dentry *old_dentry, struct user_namespace *mnt_userns,
+int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap,
struct inode *dir, struct dentry *new_dentry,
struct inode **delegated_inode)
{
@@ -4489,7 +4482,7 @@ int vfs_link(struct dentry *old_dentry, struct user_namespace *mnt_userns,
if (!inode)
return -ENOENT;
- error = may_create(mnt_userns, dir, new_dentry);
+ error = may_create(idmap, dir, new_dentry);
if (error)
return error;
@@ -4506,7 +4499,7 @@ int vfs_link(struct dentry *old_dentry, struct user_namespace *mnt_userns,
* be writen back improperly if their true value is unknown to
* the vfs.
*/
- if (HAS_UNMAPPED_ID(mnt_userns, inode))
+ if (HAS_UNMAPPED_ID(idmap, inode))
return -EPERM;
if (!dir->i_op->link)
return -EPERM;
@@ -4553,7 +4546,7 @@ EXPORT_SYMBOL(vfs_link);
int do_linkat(int olddfd, struct filename *old, int newdfd,
struct filename *new, int flags)
{
- struct user_namespace *mnt_userns;
+ struct mnt_idmap *idmap;
struct dentry *new_dentry;
struct path old_path, new_path;
struct inode *delegated_inode = NULL;
@@ -4590,14 +4583,14 @@ retry:
error = -EXDEV;
if (old_path.mnt != new_path.mnt)
goto out_dput;
- mnt_userns = mnt_user_ns(new_path.mnt);
- error = may_linkat(mnt_userns, &old_path);
+ idmap = mnt_idmap(new_path.mnt);
+ error = may_linkat(idmap, &old_path);
if (unlikely(error))
goto out_dput;
error = security_path_link(old_path.dentry, &new_path, new_dentry);
if (error)
goto out_dput;
- error = vfs_link(old_path.dentry, mnt_userns, new_path.dentry->d_inode,
+ error = vfs_link(old_path.dentry, idmap, new_path.dentry->d_inode,
new_dentry, &delegated_inode);
out_dput:
done_path_create(&new_path, new_dentry);
@@ -4697,20 +4690,20 @@ int vfs_rename(struct renamedata *rd)
if (source == target)
return 0;
- error = may_delete(rd->old_mnt_userns, old_dir, old_dentry, is_dir);
+ error = may_delete(rd->old_mnt_idmap, old_dir, old_dentry, is_dir);
if (error)
return error;
if (!target) {
- error = may_create(rd->new_mnt_userns, new_dir, new_dentry);
+ error = may_create(rd->new_mnt_idmap, new_dir, new_dentry);
} else {
new_is_dir = d_is_dir(new_dentry);
if (!(flags & RENAME_EXCHANGE))
- error = may_delete(rd->new_mnt_userns, new_dir,
+ error = may_delete(rd->new_mnt_idmap, new_dir,
new_dentry, is_dir);
else
- error = may_delete(rd->new_mnt_userns, new_dir,
+ error = may_delete(rd->new_mnt_idmap, new_dir,
new_dentry, new_is_dir);
}
if (error)
@@ -4725,13 +4718,13 @@ int vfs_rename(struct renamedata *rd)
*/
if (new_dir != old_dir) {
if (is_dir) {
- error = inode_permission(rd->old_mnt_userns, source,
+ error = inode_permission(rd->old_mnt_idmap, source,
MAY_WRITE);
if (error)
return error;
}
if ((flags & RENAME_EXCHANGE) && new_is_dir) {
- error = inode_permission(rd->new_mnt_userns, target,
+ error = inode_permission(rd->new_mnt_idmap, target,
MAY_WRITE);
if (error)
return error;
@@ -4776,7 +4769,7 @@ int vfs_rename(struct renamedata *rd)
if (error)
goto out;
}
- error = old_dir->i_op->rename(rd->new_mnt_userns, old_dir, old_dentry,
+ error = old_dir->i_op->rename(rd->new_mnt_idmap, old_dir, old_dentry,
new_dir, new_dentry, flags);
if (error)
goto out;
@@ -4921,10 +4914,10 @@ retry_deleg:
rd.old_dir = old_path.dentry->d_inode;
rd.old_dentry = old_dentry;
- rd.old_mnt_userns = mnt_user_ns(old_path.mnt);
+ rd.old_mnt_idmap = mnt_idmap(old_path.mnt);
rd.new_dir = new_path.dentry->d_inode;
rd.new_dentry = new_dentry;
- rd.new_mnt_userns = mnt_user_ns(new_path.mnt);
+ rd.new_mnt_idmap = mnt_idmap(new_path.mnt);
rd.delegated_inode = &delegated_inode;
rd.flags = flags;
error = vfs_rename(&rd);
diff --git a/fs/namespace.c b/fs/namespace.c
index ab467ee58341..5927d90e24a0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -75,22 +75,6 @@ static DECLARE_RWSEM(namespace_sem);
static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
-struct mnt_idmap {
- struct user_namespace *owner;
- refcount_t count;
-};
-
-/*
- * Carries the initial idmapping of 0:0:4294967295 which is an identity
- * mapping. This means that {g,u}id 0 is mapped to {g,u}id 0, {g,u}id 1 is
- * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...].
- */
-struct mnt_idmap nop_mnt_idmap = {
- .owner = &init_user_ns,
- .count = REFCOUNT_INIT(1),
-};
-EXPORT_SYMBOL_GPL(nop_mnt_idmap);
-
struct mount_kattr {
unsigned int attr_set;
unsigned int attr_clr;
@@ -210,104 +194,6 @@ int mnt_get_count(struct mount *mnt)
#endif
}
-/**
- * mnt_idmap_owner - retrieve owner of the mount's idmapping
- * @idmap: mount idmapping
- *
- * This helper will go away once the conversion to use struct mnt_idmap
- * everywhere has finished at which point the helper will be unexported.
- *
- * Only code that needs to perform permission checks based on the owner of the
- * idmapping will get access to it. All other code will solely rely on
- * idmappings. This will get us type safety so it's impossible to conflate
- * filesystems idmappings with mount idmappings.
- *
- * Return: The owner of the idmapping.
- */
-struct user_namespace *mnt_idmap_owner(const struct mnt_idmap *idmap)
-{
- return idmap->owner;
-}
-EXPORT_SYMBOL_GPL(mnt_idmap_owner);
-
-/**
- * mnt_user_ns - retrieve owner of an idmapped mount
- * @mnt: the relevant vfsmount
- *
- * This helper will go away once the conversion to use struct mnt_idmap
- * everywhere has finished at which point the helper will be unexported.
- *
- * Only code that needs to perform permission checks based on the owner of the
- * idmapping will get access to it. All other code will solely rely on
- * idmappings. This will get us type safety so it's impossible to conflate
- * filesystems idmappings with mount idmappings.
- *
- * Return: The owner of the idmapped.
- */
-struct user_namespace *mnt_user_ns(const struct vfsmount *mnt)
-{
- struct mnt_idmap *idmap = mnt_idmap(mnt);
-
- /* Return the actual owner of the filesystem instead of the nop. */
- if (idmap == &nop_mnt_idmap &&
- !initial_idmapping(mnt->mnt_sb->s_user_ns))
- return mnt->mnt_sb->s_user_ns;
- return mnt_idmap_owner(idmap);
-}
-EXPORT_SYMBOL_GPL(mnt_user_ns);
-
-/**
- * alloc_mnt_idmap - allocate a new idmapping for the mount
- * @mnt_userns: owning userns of the idmapping
- *
- * Allocate a new struct mnt_idmap which carries the idmapping of the mount.
- *
- * Return: On success a new idmap, on error an error pointer is returned.
- */
-static struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns)
-{
- struct mnt_idmap *idmap;
-
- idmap = kzalloc(sizeof(struct mnt_idmap), GFP_KERNEL_ACCOUNT);
- if (!idmap)
- return ERR_PTR(-ENOMEM);
-
- idmap->owner = get_user_ns(mnt_userns);
- refcount_set(&idmap->count, 1);
- return idmap;
-}
-
-/**
- * mnt_idmap_get - get a reference to an idmapping
- * @idmap: the idmap to bump the reference on
- *
- * If @idmap is not the @nop_mnt_idmap bump the reference count.
- *
- * Return: @idmap with reference count bumped if @not_mnt_idmap isn't passed.
- */
-static inline struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap)
-{
- if (idmap != &nop_mnt_idmap)
- refcount_inc(&idmap->count);
-
- return idmap;
-}
-
-/**
- * mnt_idmap_put - put a reference to an idmapping
- * @idmap: the idmap to put the reference on
- *
- * If this is a non-initial idmapping, put the reference count when a mount is
- * released and free it if we're the last user.
- */
-static inline void mnt_idmap_put(struct mnt_idmap *idmap)
-{
- if (idmap != &nop_mnt_idmap && refcount_dec_and_test(&idmap->count)) {
- put_user_ns(idmap->owner);
- kfree(idmap);
- }
-}
-
static struct mount *alloc_vfsmnt(const char *name)
{
struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -4094,7 +3980,7 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
* Creating an idmapped mount with the filesystem wide idmapping
* doesn't make sense so block that. We don't allow mushy semantics.
*/
- if (mnt_idmap_owner(kattr->mnt_idmap) == fs_userns)
+ if (!check_fsmapping(kattr->mnt_idmap, m->mnt_sb))
return -EINVAL;
/*
@@ -4340,7 +4226,7 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
* result.
*/
mnt_userns = container_of(ns, struct user_namespace, ns);
- if (initial_idmapping(mnt_userns)) {
+ if (mnt_userns == &init_user_ns) {
err = -EPERM;
goto out_fput;
}
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 1ead5bd740c2..14a72224b657 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -209,8 +209,8 @@ config NFS_DISABLE_UDP_SUPPORT
config NFS_V4_2_READ_PLUS
bool "NFS: Enable support for the NFSv4.2 READ_PLUS operation"
depends on NFS_V4_2
- default y
+ default n
help
- Choose Y here to enable the use of READ_PLUS over NFS v4.2. READ_PLUS
- attempts to improve read performance by compressing out sparse holes
- in the file contents.
+ This is intended for developers only. The READ_PLUS operation has
+ been shown to have issues under specific conditions and should not
+ be used in production.
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ea1ceffa1d3a..f8e420464b77 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2296,7 +2296,7 @@ EXPORT_SYMBOL_GPL(nfs_instantiate);
* that the operation succeeded on the server, but an error in the
* reply path made it appear to have failed.
*/
-int nfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+int nfs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
struct iattr attr;
@@ -2325,7 +2325,7 @@ EXPORT_SYMBOL_GPL(nfs_create);
* See comments for nfs_proc_create regarding failed operations.
*/
int
-nfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+nfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct iattr attr;
@@ -2352,7 +2352,7 @@ EXPORT_SYMBOL_GPL(nfs_mknod);
/*
* See comments for nfs_proc_create regarding failed operations.
*/
-int nfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+int nfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct iattr attr;
@@ -2524,7 +2524,7 @@ EXPORT_SYMBOL_GPL(nfs_unlink);
* now have a new file handle and can instantiate an in-core NFS inode
* and move the raw page into its mapping.
*/
-int nfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
struct page *page;
@@ -2642,7 +2642,7 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
* If these conditions are met, we can drop the dentries before doing
* the rename.
*/
-int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
@@ -2957,12 +2957,14 @@ static u64 nfs_access_login_time(const struct task_struct *task,
const struct cred *cred)
{
const struct task_struct *parent;
+ const struct cred *pcred;
u64 ret;
rcu_read_lock();
for (;;) {
parent = rcu_dereference(task->real_parent);
- if (parent == task || cred_fscmp(parent->cred, cred) != 0)
+ pcred = rcu_dereference(parent->cred);
+ if (parent == task || cred_fscmp(pcred, cred) != 0)
break;
task = parent;
}
@@ -3023,6 +3025,7 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre
* but do it without locking.
*/
struct nfs_inode *nfsi = NFS_I(inode);
+ u64 login_time = nfs_access_login_time(current, cred);
struct nfs_access_entry *cache;
int err = -ECHILD;
struct list_head *lh;
@@ -3037,6 +3040,8 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre
cache = NULL;
if (cache == NULL)
goto out;
+ if ((s64)(login_time - cache->timestamp) > 0)
+ goto out;
if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
goto out;
*mask = cache->mask;
@@ -3257,7 +3262,7 @@ static int nfs_execute_ok(struct inode *inode, int mask)
return ret;
}
-int nfs_permission(struct user_namespace *mnt_userns,
+int nfs_permission(struct mnt_idmap *idmap,
struct inode *inode,
int mask)
{
@@ -3308,7 +3313,7 @@ out_notsup:
res = nfs_revalidate_inode(inode, NFS_INO_INVALID_MODE |
NFS_INO_INVALID_OTHER);
if (res == 0)
- res = generic_permission(&init_user_ns, inode, mask);
+ res = generic_permission(&nop_mnt_idmap, inode, mask);
goto out;
}
EXPORT_SYMBOL_GPL(nfs_permission);
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index 01596f2d0a1e..1a9d5aa51dfb 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -145,17 +145,10 @@ out:
return parent;
}
-static u64 nfs_fetch_iversion(struct inode *inode)
-{
- nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
- return inode_peek_iversion_raw(inode);
-}
-
const struct export_operations nfs_export_ops = {
.encode_fh = nfs_encode_fh,
.fh_to_dentry = nfs_fh_to_dentry,
.get_parent = nfs_get_parent,
- .fetch_iversion = nfs_fetch_iversion,
.flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK|
EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS|
EXPORT_OP_NOATOMIC_ATTR,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d8ec889a4b3f..b0f3c9339e70 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -31,6 +31,7 @@
#include <linux/swap.h>
#include <linux/uaccess.h>
+#include <linux/filelock.h>
#include "delegation.h"
#include "internal.h"
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index ad34a33b0737..4974cd18ca46 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -783,6 +783,12 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
return &fl->generic_hdr;
}
+static bool
+filelayout_lseg_is_striped(const struct nfs4_filelayout_segment *flseg)
+{
+ return flseg->num_fh > 1;
+}
+
/*
* filelayout_pg_test(). Called by nfs_can_coalesce_requests()
*
@@ -803,6 +809,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
size = pnfs_generic_pg_test(pgio, prev, req);
if (!size)
return 0;
+ else if (!filelayout_lseg_is_striped(FILELAYOUT_LSEG(pgio->pg_lseg)))
+ return size;
/* see if req and prev are in the same stripe */
if (prev) {
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e98ee7599eeb..222a28320e1c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -606,7 +606,7 @@ EXPORT_SYMBOL_GPL(nfs_fhget);
#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN)
int
-nfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
@@ -825,10 +825,12 @@ static u32 nfs_get_valid_attrmask(struct inode *inode)
reply_mask |= STATX_UID | STATX_GID;
if (!(cache_validity & NFS_INO_INVALID_BLOCKS))
reply_mask |= STATX_BLOCKS;
+ if (!(cache_validity & NFS_INO_INVALID_CHANGE))
+ reply_mask |= STATX_CHANGE_COOKIE;
return reply_mask;
}
-int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int nfs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
@@ -843,7 +845,8 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
request_mask &= STATX_TYPE | STATX_MODE | STATX_NLINK | STATX_UID |
STATX_GID | STATX_ATIME | STATX_MTIME | STATX_CTIME |
- STATX_INO | STATX_SIZE | STATX_BLOCKS;
+ STATX_INO | STATX_SIZE | STATX_BLOCKS | STATX_BTIME |
+ STATX_CHANGE_COOKIE;
if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) {
if (readdirplus_enabled)
@@ -851,8 +854,8 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
goto out_no_revalidate;
}
- /* Flush out writes to the server in order to update c/mtime. */
- if ((request_mask & (STATX_CTIME | STATX_MTIME)) &&
+ /* Flush out writes to the server in order to update c/mtime/version. */
+ if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_CHANGE_COOKIE)) &&
S_ISREG(inode->i_mode))
filemap_write_and_wait(inode->i_mapping);
@@ -872,7 +875,8 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
/* Is the user requesting attributes that might need revalidation? */
if (!(request_mask & (STATX_MODE|STATX_NLINK|STATX_ATIME|STATX_CTIME|
STATX_MTIME|STATX_UID|STATX_GID|
- STATX_SIZE|STATX_BLOCKS)))
+ STATX_SIZE|STATX_BLOCKS|
+ STATX_CHANGE_COOKIE)))
goto out_no_revalidate;
/* Check whether the cached attributes are stale */
@@ -908,8 +912,12 @@ out_no_revalidate:
/* Only return attributes that were revalidated. */
stat->result_mask = nfs_get_valid_attrmask(inode) | request_mask;
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
+ stat->change_cookie = inode_peek_iversion_raw(inode);
+ stat->attributes_mask |= STATX_ATTR_CHANGE_MONOTONIC;
+ if (server->change_attr_type != NFS4_CHANGE_TYPE_IS_UNDEFINED)
+ stat->attributes |= STATX_ATTR_CHANGE_MONOTONIC;
if (S_ISDIR(inode->i_mode))
stat->blksize = NFS_SERVER(inode)->dtsize;
out:
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ae7d4a8c728c..41468c21291d 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -384,18 +384,18 @@ extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
struct shrink_control *sc);
struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int);
void nfs_d_prune_case_insensitive_aliases(struct inode *inode);
-int nfs_create(struct user_namespace *, struct inode *, struct dentry *,
+int nfs_create(struct mnt_idmap *, struct inode *, struct dentry *,
umode_t, bool);
-int nfs_mkdir(struct user_namespace *, struct inode *, struct dentry *,
+int nfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *,
umode_t);
int nfs_rmdir(struct inode *, struct dentry *);
int nfs_unlink(struct inode *, struct dentry *);
-int nfs_symlink(struct user_namespace *, struct inode *, struct dentry *,
+int nfs_symlink(struct mnt_idmap *, struct inode *, struct dentry *,
const char *);
int nfs_link(struct dentry *, struct inode *, struct dentry *);
-int nfs_mknod(struct user_namespace *, struct inode *, struct dentry *, umode_t,
+int nfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t,
dev_t);
-int nfs_rename(struct user_namespace *, struct inode *, struct dentry *,
+int nfs_rename(struct mnt_idmap *, struct inode *, struct dentry *,
struct inode *, struct dentry *, unsigned int);
#ifdef CONFIG_NFS_V4_2
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index b0ef7e7ddb30..19d51ebf842c 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -208,23 +208,23 @@ out_fc:
}
static int
-nfs_namespace_getattr(struct user_namespace *mnt_userns,
+nfs_namespace_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
if (NFS_FH(d_inode(path->dentry))->size != 0)
- return nfs_getattr(mnt_userns, path, stat, request_mask,
+ return nfs_getattr(idmap, path, stat, request_mask,
query_flags);
- generic_fillattr(&init_user_ns, d_inode(path->dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat);
return 0;
}
static int
-nfs_namespace_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+nfs_namespace_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
if (NFS_FH(d_inode(dentry))->size != 0)
- return nfs_setattr(mnt_userns, dentry, attr);
+ return nfs_setattr(idmap, dentry, attr);
return -EACCES;
}
diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h
index df9ca56db347..4fa37dc038b5 100644
--- a/fs/nfs/nfs3_fs.h
+++ b/fs/nfs/nfs3_fs.h
@@ -12,7 +12,7 @@
*/
#ifdef CONFIG_NFS_V3_ACL
extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type, bool rcu);
-extern int nfs3_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+extern int nfs3_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type);
extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
struct posix_acl *dfacl);
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 74d11e3c4205..1247f544a440 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -255,7 +255,7 @@ int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
}
-int nfs3_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int nfs3_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
struct posix_acl *orig = acl, *dfacl = NULL, *alloc;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 5edd1704f735..4c9f8bd866ab 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -23,6 +23,7 @@
#define NFS4_MAX_LOOP_ON_RECOVER (10)
#include <linux/seqlock.h>
+#include <linux/filelock.h>
struct idmap;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 40d749f29ed3..d9c332019d06 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7692,7 +7692,7 @@ nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp)
#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
static int nfs4_xattr_set_nfs4_acl(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *key, const void *buf,
size_t buflen, int flags)
@@ -7716,7 +7716,7 @@ static bool nfs4_xattr_list_nfs4_acl(struct dentry *dentry)
#define XATTR_NAME_NFSV4_DACL "system.nfs4_dacl"
static int nfs4_xattr_set_nfs4_dacl(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *key, const void *buf,
size_t buflen, int flags)
@@ -7739,7 +7739,7 @@ static bool nfs4_xattr_list_nfs4_dacl(struct dentry *dentry)
#define XATTR_NAME_NFSV4_SACL "system.nfs4_sacl"
static int nfs4_xattr_set_nfs4_sacl(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *key, const void *buf,
size_t buflen, int flags)
@@ -7764,7 +7764,7 @@ static bool nfs4_xattr_list_nfs4_sacl(struct dentry *dentry)
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *key, const void *buf,
size_t buflen, int flags)
@@ -7815,7 +7815,7 @@ nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len)
#ifdef CONFIG_NFS_V4_2
static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *key, const void *buf,
size_t buflen, int flags)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 16be6dae524f..779bfc37233c 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -21,6 +21,7 @@
#include <linux/nfs_page.h>
#include <linux/nfs_mount.h>
#include <linux/export.h>
+#include <linux/filelock.h>
#include "internal.h"
#include "pnfs.h"
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 80c240e50952..1a80d548253a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -25,6 +25,7 @@
#include <linux/freezer.h>
#include <linux/wait.h>
#include <linux/iversion.h>
+#include <linux/filelock.h>
#include <linux/uaccess.h>
#include <linux/sched/mm.h>
diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c
index 0a9b72685f98..1479583fbb62 100644
--- a/fs/nfs_common/grace.c
+++ b/fs/nfs_common/grace.c
@@ -9,6 +9,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <linux/fs.h>
+#include <linux/filelock.h>
static unsigned int grace_net_id;
static DEFINE_SPINLOCK(grace_lock);
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 45b2c9e3f636..c0950edb26b0 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -662,6 +662,39 @@ static struct shrinker nfsd_file_shrinker = {
};
/**
+ * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file
+ * @nf: nfsd_file to attempt to queue
+ * @dispose: private list to queue successfully-put objects
+ *
+ * Unhash an nfsd_file, try to get a reference to it, and then put that
+ * reference. If it's the last reference, queue it to the dispose list.
+ */
+static void
+nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose)
+ __must_hold(RCU)
+{
+ int decrement = 1;
+
+ /* If we raced with someone else unhashing, ignore it */
+ if (!nfsd_file_unhash(nf))
+ return;
+
+ /* If we can't get a reference, ignore it */
+ if (!nfsd_file_get(nf))
+ return;
+
+ /* Extra decrement if we remove from the LRU */
+ if (nfsd_file_lru_remove(nf))
+ ++decrement;
+
+ /* If refcount goes to 0, then put on the dispose list */
+ if (refcount_sub_and_test(decrement, &nf->nf_ref)) {
+ list_add(&nf->nf_lru, dispose);
+ trace_nfsd_file_closing(nf);
+ }
+}
+
+/**
* nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode
* @inode: inode on which to close out nfsd_files
* @dispose: list on which to gather nfsd_files to close out
@@ -688,30 +721,11 @@ nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose)
rcu_read_lock();
do {
- int decrement = 1;
-
nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
nfsd_file_rhash_params);
if (!nf)
break;
-
- /* If we raced with someone else unhashing, ignore it */
- if (!nfsd_file_unhash(nf))
- continue;
-
- /* If we can't get a reference, ignore it */
- if (!nfsd_file_get(nf))
- continue;
-
- /* Extra decrement if we remove from the LRU */
- if (nfsd_file_lru_remove(nf))
- ++decrement;
-
- /* If refcount goes to 0, then put on the dispose list */
- if (refcount_sub_and_test(decrement, &nf->nf_ref)) {
- list_add(&nf->nf_lru, dispose);
- trace_nfsd_file_closing(nf);
- }
+ nfsd_file_cond_queue(nf, dispose);
} while (1);
rcu_read_unlock();
}
@@ -928,11 +942,8 @@ __nfsd_file_cache_purge(struct net *net)
nf = rhashtable_walk_next(&iter);
while (!IS_ERR_OR_NULL(nf)) {
- if (!net || nf->nf_net == net) {
- nfsd_file_unhash(nf);
- nfsd_file_lru_remove(nf);
- list_add(&nf->nf_lru, &dispose);
- }
+ if (!net || nf->nf_net == net)
+ nfsd_file_cond_queue(nf, &dispose);
nf = rhashtable_walk_next(&iter);
}
@@ -1071,8 +1082,8 @@ nfsd_file_is_cached(struct inode *inode)
static __be32
nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
- unsigned int may_flags, struct nfsd_file **pnf,
- bool open, bool want_gc)
+ unsigned int may_flags, struct file *file,
+ struct nfsd_file **pnf, bool want_gc)
{
struct nfsd_file_lookup_key key = {
.type = NFSD_FILE_KEY_FULL,
@@ -1147,8 +1158,7 @@ wait_for_construction:
status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
out:
if (status == nfs_ok) {
- if (open)
- this_cpu_inc(nfsd_file_acquisitions);
+ this_cpu_inc(nfsd_file_acquisitions);
*pnf = nf;
} else {
if (refcount_dec_and_test(&nf->nf_ref))
@@ -1158,20 +1168,23 @@ out:
out_status:
put_cred(key.cred);
- if (open)
- trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
+ trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
return status;
open_file:
trace_nfsd_file_alloc(nf);
nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode);
if (nf->nf_mark) {
- if (open) {
+ if (file) {
+ get_file(file);
+ nf->nf_file = file;
+ status = nfs_ok;
+ trace_nfsd_file_opened(nf, status);
+ } else {
status = nfsd_open_verified(rqstp, fhp, may_flags,
&nf->nf_file);
trace_nfsd_file_open(nf, status);
- } else
- status = nfs_ok;
+ }
} else
status = nfserr_jukebox;
/*
@@ -1207,7 +1220,7 @@ __be32
nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf)
{
- return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, true);
+ return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true);
}
/**
@@ -1228,28 +1241,30 @@ __be32
nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf)
{
- return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, false);
+ return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false);
}
/**
- * nfsd_file_create - Get a struct nfsd_file, do not open
+ * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file
* @rqstp: the RPC transaction being executed
* @fhp: the NFS filehandle of the file just created
* @may_flags: NFSD_MAY_ settings for the file
+ * @file: cached, already-open file (may be NULL)
* @pnf: OUT: new or found "struct nfsd_file" object
*
- * The nfsd_file_object returned by this API is reference-counted
- * but not garbage-collected. The object is released immediately
- * one RCU grace period after the final nfsd_file_put().
+ * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist,
+ * and @file is non-NULL, use it to instantiate a new nfsd_file instead of
+ * opening a new one.
*
* Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
* network byte order is returned.
*/
__be32
-nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
- unsigned int may_flags, struct nfsd_file **pnf)
+nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct file *file,
+ struct nfsd_file **pnf)
{
- return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false, false);
+ return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false);
}
/*
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index b7efb2c3ddb1..41516a4263ea 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -60,7 +60,8 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **nfp);
__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **nfp);
-__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
- unsigned int may_flags, struct nfsd_file **nfp);
+__be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct file *file,
+ struct nfsd_file **nfp);
int nfsd_file_cache_stats_show(struct seq_file *m, void *v);
#endif /* _FS_NFSD_FILECACHE_H */
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 8c854ba3285b..ec49b200b797 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -10,6 +10,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <linux/filelock.h>
#include <linux/percpu_counter.h>
#include <linux/siphash.h>
@@ -195,7 +196,7 @@ struct nfsd_net {
atomic_t nfsd_courtesy_clients;
struct shrinker nfsd_client_shrinker;
- struct delayed_work nfsd_shrinker_work;
+ struct work_struct nfsd_shrinker_work;
};
/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 1457f59f447a..995cb2c90b1a 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -113,11 +113,11 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp)
inode_lock(inode);
- error = set_posix_acl(&init_user_ns, fh->fh_dentry, ACL_TYPE_ACCESS,
+ error = set_posix_acl(&nop_mnt_idmap, fh->fh_dentry, ACL_TYPE_ACCESS,
argp->acl_access);
if (error)
goto out_drop_lock;
- error = set_posix_acl(&init_user_ns, fh->fh_dentry, ACL_TYPE_DEFAULT,
+ error = set_posix_acl(&nop_mnt_idmap, fh->fh_dentry, ACL_TYPE_DEFAULT,
argp->acl_default);
if (error)
goto out_drop_lock;
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 647108138e8a..887803735e2a 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -103,11 +103,11 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp)
inode_lock(inode);
- error = set_posix_acl(&init_user_ns, fh->fh_dentry, ACL_TYPE_ACCESS,
+ error = set_posix_acl(&nop_mnt_idmap, fh->fh_dentry, ACL_TYPE_ACCESS,
argp->acl_access);
if (error)
goto out_drop_lock;
- error = set_posix_acl(&init_user_ns, fh->fh_dentry, ACL_TYPE_DEFAULT,
+ error = set_posix_acl(&nop_mnt_idmap, fh->fh_dentry, ACL_TYPE_DEFAULT,
argp->acl_default);
out_drop_lock:
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index d01b29aba662..f41992ecd0d7 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -320,7 +320,7 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
iap->ia_mode &= ~current_umask();
fh_fill_pre_attrs(fhp);
- host_err = vfs_create(&init_user_ns, inode, child, iap->ia_mode, true);
+ host_err = vfs_create(&nop_mnt_idmap, inode, child, iap->ia_mode, true);
if (host_err < 0) {
status = nfserrno(host_err);
goto out;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index bd880d55f565..f189ba7995f5 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -937,7 +937,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* the client wants us to do more in this compound:
*/
if (!nfsd4_last_compound_op(rqstp))
- __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* check stateid */
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
@@ -1318,6 +1318,7 @@ try_again:
/* allow 20secs for mount/unmount for now - revisit */
if (signal_pending(current) ||
(schedule_timeout(20*HZ) == 0)) {
+ finish_wait(&nn->nfsd_ssc_waitq, &wait);
kfree(work);
return nfserr_eagain;
}
@@ -2607,12 +2608,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
cstate->minorversion = args->minorversion;
fh_init(current_fh, NFS4_FHSIZE);
fh_init(save_fh, NFS4_FHSIZE);
-
/*
* Don't use the deferral mechanism for NFSv4; compounds make it
* too hard to avoid non-idempotency problems.
*/
- __clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
+ clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
/*
* According to RFC3010, this takes precedence over all other errors.
@@ -2734,7 +2734,7 @@ encode_op:
out:
cstate->status = status;
/* Reset deferral mechanism for RPC deferrals */
- __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
+ set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
return rpc_success;
}
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 78b8cd9651d5..3509e73abe1f 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -233,7 +233,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
* as well be forgiving and just succeed silently.
*/
goto out_put;
- status = vfs_mkdir(&init_user_ns, d_inode(dir), dentry, S_IRWXU);
+ status = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU);
out_put:
dput(dentry);
out_unlock:
@@ -353,7 +353,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
status = -ENOENT;
if (d_really_is_negative(dentry))
goto out;
- status = vfs_rmdir(&init_user_ns, d_inode(dir), dentry);
+ status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry);
out:
dput(dentry);
out_unlock:
@@ -443,7 +443,7 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
if (nfs4_has_reclaimed_state(name, nn))
goto out_free;
- status = vfs_rmdir(&init_user_ns, d_inode(parent), child);
+ status = vfs_rmdir(&nop_mnt_idmap, d_inode(parent), child);
if (status)
printk("failed to remove client recovery directory %pd\n",
child);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 7b2ee535ade8..c1684da6c01f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4411,7 +4411,7 @@ nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
if (!count)
count = atomic_long_read(&num_delegations);
if (count)
- mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0);
+ queue_work(laundry_wq, &nn->nfsd_shrinker_work);
return (unsigned long)count;
}
@@ -4421,7 +4421,7 @@ nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
return SHRINK_STOP;
}
-int
+void
nfsd4_init_leases_net(struct nfsd_net *nn)
{
struct sysinfo si;
@@ -4443,16 +4443,6 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB);
atomic_set(&nn->nfsd_courtesy_clients, 0);
- nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan;
- nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count;
- nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
- return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
-}
-
-void
-nfsd4_leases_net_shutdown(struct nfsd_net *nn)
-{
- unregister_shrinker(&nn->nfsd_client_shrinker);
}
static void init_nfs4_replay(struct nfs4_replay *rp)
@@ -5262,18 +5252,10 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
if (!fp->fi_fds[oflag]) {
spin_unlock(&fp->fi_lock);
- if (!open->op_filp) {
- status = nfsd_file_acquire(rqstp, cur_fh, access, &nf);
- if (status != nfs_ok)
- goto out_put_access;
- } else {
- status = nfsd_file_create(rqstp, cur_fh, access, &nf);
- if (status != nfs_ok)
- goto out_put_access;
- nf->nf_file = open->op_filp;
- open->op_filp = NULL;
- trace_nfsd_file_create(rqstp, access, nf);
- }
+ status = nfsd_file_acquire_opened(rqstp, cur_fh, access,
+ open->op_filp, &nf);
+ if (status != nfs_ok)
+ goto out_put_access;
spin_lock(&fp->fi_lock);
if (!fp->fi_fds[oflag]) {
@@ -5374,7 +5356,7 @@ static int nfsd4_check_conflicting_opens(struct nfs4_client *clp,
{
struct nfs4_ol_stateid *st;
struct file *f = fp->fi_deleg_file->nf_file;
- struct inode *ino = locks_inode(f);
+ struct inode *ino = file_inode(f);
int writes;
writes = atomic_read(&ino->i_writecount);
@@ -6243,8 +6225,7 @@ deleg_reaper(struct nfsd_net *nn)
static void
nfsd4_state_shrinker_worker(struct work_struct *work)
{
- struct delayed_work *dwork = to_delayed_work(work);
- struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+ struct nfsd_net *nn = container_of(work, struct nfsd_net,
nfsd_shrinker_work);
courtesy_client_reaper(nn);
@@ -7828,7 +7809,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
return status;
}
- inode = locks_inode(nf->nf_file);
+ inode = file_inode(nf->nf_file);
flctx = locks_inode_context(inode);
if (flctx && !list_empty_careful(&flctx->flc_posix)) {
@@ -8074,11 +8055,20 @@ static int nfs4_state_create_net(struct net *net)
INIT_LIST_HEAD(&nn->blocked_locks_lru);
INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
- INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
+ INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
get_net(net);
+ nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan;
+ nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count;
+ nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
+
+ if (register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client"))
+ goto err_shrinker;
return 0;
+err_shrinker:
+ put_net(net);
+ kfree(nn->sessionid_hashtbl);
err_sessionid:
kfree(nn->unconf_id_hashtbl);
err_unconf_id:
@@ -8171,6 +8161,8 @@ nfs4_state_shutdown_net(struct net *net)
struct list_head *pos, *next, reaplist;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ unregister_shrinker(&nn->nfsd_client_shrinker);
+ cancel_work(&nn->nfsd_shrinker_work);
cancel_delayed_work_sync(&nn->laundromat_work);
locks_end_grace(&nn->nfsd4_manager);
@@ -8190,7 +8182,6 @@ nfs4_state_shutdown_net(struct net *net)
nfsd4_client_tracking_exit(net);
nfs4_state_destroy_net(net);
- rhltable_destroy(&nfs4_file_rhltable);
#ifdef CONFIG_NFSD_V4_2_INTER_SSC
nfsd4_ssc_shutdown_umount(nn);
#endif
@@ -8200,6 +8191,7 @@ void
nfs4_state_shutdown(void)
{
nfsd4_destroy_callback_queue();
+ rhltable_destroy(&nfs4_file_rhltable);
}
static void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2b4ae858c89b..e12e5a4ad502 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2523,7 +2523,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
- __clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
+ clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
return true;
}
@@ -2965,7 +2965,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
goto out;
}
- err = vfs_getattr(&path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
+ err = vfs_getattr(&path, &stat,
+ STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
+ AT_STATX_SYNC_AS_STAT);
if (err)
goto out_nfserr;
if (!(stat.result_mask & STATX_BTIME))
@@ -3629,6 +3631,17 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
case nfserr_noent:
xdr_truncate_encode(xdr, start_offset);
goto skip_entry;
+ case nfserr_jukebox:
+ /*
+ * The pseudoroot should only display dentries that lead to
+ * exports. If we get EJUKEBOX here, then we can't tell whether
+ * this entry should be included. Just fail the whole READDIR
+ * with NFS4ERR_DELAY in that case, and hope that the situation
+ * will resolve itself by the client's next attempt.
+ */
+ if (cd->rd_fhp->fh_export->ex_flags & NFSEXP_V4ROOT)
+ goto fail;
+ fallthrough;
default:
/*
* If the client requested the RDATTR_ERROR attribute,
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index d1e581a60480..c2577ee7ffb2 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1457,9 +1457,7 @@ static __net_init int nfsd_init_net(struct net *net)
goto out_idmap_error;
nn->nfsd_versions = NULL;
nn->nfsd4_minorversions = NULL;
- retval = nfsd4_init_leases_net(nn);
- if (retval)
- goto out_drc_error;
+ nfsd4_init_leases_net(nn);
retval = nfsd_reply_cache_init(nn);
if (retval)
goto out_cache_error;
@@ -1469,8 +1467,6 @@ static __net_init int nfsd_init_net(struct net *net)
return 0;
out_cache_error:
- nfsd4_leases_net_shutdown(nn);
-out_drc_error:
nfsd_idmap_shutdown(net);
out_idmap_error:
nfsd_export_shutdown(net);
@@ -1486,7 +1482,6 @@ static __net_exit void nfsd_exit_net(struct net *net)
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
nfsd_netns_free_versions(net_generic(net, nfsd_net_id));
- nfsd4_leases_net_shutdown(nn);
}
static struct pernet_operations nfsd_net_ops = {
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 93b42ef9ed91..fa0144a74267 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -504,8 +504,7 @@ extern void unregister_cld_notifier(void);
extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
#endif
-extern int nfsd4_init_leases_net(struct nfsd_net *nn);
-extern void nfsd4_leases_net_shutdown(struct nfsd_net *nn);
+extern void nfsd4_init_leases_net(struct nfsd_net *nn);
#else /* CONFIG_NFSD_V4 */
static inline int nfsd4_is_junction(struct dentry *dentry)
@@ -513,8 +512,7 @@ static inline int nfsd4_is_junction(struct dentry *dentry)
return 0;
}
-static inline int nfsd4_init_leases_net(struct nfsd_net *nn) { return 0; };
-static inline void nfsd4_leases_net_shutdown(struct nfsd_net *nn) {};
+static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { };
#define register_cld_notifier() 0
#define unregister_cld_notifier() do { } while(0)
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 8c52b6c9d31a..ccd8485fee04 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -40,7 +40,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
/* make sure parents give x permission to user */
int err;
parent = dget_parent(tdentry);
- err = inode_permission(&init_user_ns,
+ err = inode_permission(&nop_mnt_idmap,
d_inode(parent), MAY_EXEC);
if (err < 0) {
dput(parent);
@@ -628,6 +628,10 @@ void fh_fill_pre_attrs(struct svc_fh *fhp)
stat.mtime = inode->i_mtime;
stat.ctime = inode->i_ctime;
stat.size = inode->i_size;
+ if (v4 && IS_I_VERSION(inode)) {
+ stat.change_cookie = inode_query_iversion(inode);
+ stat.result_mask |= STATX_CHANGE_COOKIE;
+ }
}
if (v4)
fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
@@ -659,6 +663,10 @@ void fh_fill_post_attrs(struct svc_fh *fhp)
if (err) {
fhp->fh_post_saved = false;
fhp->fh_post_attr.ctime = inode->i_ctime;
+ if (v4 && IS_I_VERSION(inode)) {
+ fhp->fh_post_attr.change_cookie = inode_query_iversion(inode);
+ fhp->fh_post_attr.result_mask |= STATX_CHANGE_COOKIE;
+ }
} else
fhp->fh_post_saved = true;
if (v4)
@@ -748,3 +756,37 @@ enum fsid_source fsid_source(const struct svc_fh *fhp)
return FSIDSOURCE_UUID;
return FSIDSOURCE_DEV;
}
+
+/*
+ * We could use i_version alone as the change attribute. However, i_version
+ * can go backwards on a regular file after an unclean shutdown. On its own
+ * that doesn't necessarily cause a problem, but if i_version goes backwards
+ * and then is incremented again it could reuse a value that was previously
+ * used before boot, and a client who queried the two values might incorrectly
+ * assume nothing changed.
+ *
+ * By using both ctime and the i_version counter we guarantee that as long as
+ * time doesn't go backwards we never reuse an old value. If the filesystem
+ * advertises STATX_ATTR_CHANGE_MONOTONIC, then this mitigation is not
+ * needed.
+ *
+ * We only need to do this for regular files as well. For directories, we
+ * assume that the new change attr is always logged to stable storage in some
+ * fashion before the results can be seen.
+ */
+u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode)
+{
+ u64 chattr;
+
+ if (stat->result_mask & STATX_CHANGE_COOKIE) {
+ chattr = stat->change_cookie;
+ if (S_ISREG(inode->i_mode) &&
+ !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) {
+ chattr += (u64)stat->ctime.tv_sec << 30;
+ chattr += stat->ctime.tv_nsec;
+ }
+ } else {
+ chattr = time_to_chattr(&stat->ctime);
+ }
+ return chattr;
+}
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 513e028b0bbe..4e0ecf0ae2cf 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -293,34 +293,7 @@ static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
fhp->fh_pre_saved = false;
}
-/*
- * We could use i_version alone as the change attribute. However,
- * i_version can go backwards after a reboot. On its own that doesn't
- * necessarily cause a problem, but if i_version goes backwards and then
- * is incremented again it could reuse a value that was previously used
- * before boot, and a client who queried the two values might
- * incorrectly assume nothing changed.
- *
- * By using both ctime and the i_version counter we guarantee that as
- * long as time doesn't go backwards we never reuse an old value.
- */
-static inline u64 nfsd4_change_attribute(struct kstat *stat,
- struct inode *inode)
-{
- if (inode->i_sb->s_export_op->fetch_iversion)
- return inode->i_sb->s_export_op->fetch_iversion(inode);
- else if (IS_I_VERSION(inode)) {
- u64 chattr;
-
- chattr = stat->ctime.tv_sec;
- chattr <<= 30;
- chattr += stat->ctime.tv_nsec;
- chattr += inode_query_iversion(inode);
- return chattr;
- } else
- return time_to_chattr(&stat->ctime);
-}
-
+u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode);
extern void fh_fill_pre_attrs(struct svc_fh *fhp);
extern void fh_fill_post_attrs(struct svc_fh *fhp);
extern void fh_fill_both_attrs(struct svc_fh *fhp);
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index a5570cf75f3f..a82d91afdc9c 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -93,7 +93,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
if (delta < 0)
delta = -delta;
if (delta < MAX_TOUCH_TIME_ERROR &&
- setattr_prepare(&init_user_ns, fhp->fh_dentry, iap) != 0) {
+ setattr_prepare(&nop_mnt_idmap, fhp->fh_dentry, iap) != 0) {
/*
* Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
* This will cause notify_change to set these times
@@ -211,7 +211,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
if (resp->status == nfs_ok)
resp->status = fh_getattr(&resp->fh, &resp->stat);
else if (resp->status == nfserr_jukebox)
- __set_bit(RQ_DROPME, &rqstp->rq_flags);
+ set_bit(RQ_DROPME, &rqstp->rq_flags);
return rpc_success;
}
@@ -246,7 +246,7 @@ nfsd_proc_write(struct svc_rqst *rqstp)
if (resp->status == nfs_ok)
resp->status = fh_getattr(&resp->fh, &resp->stat);
else if (resp->status == nfserr_jukebox)
- __set_bit(RQ_DROPME, &rqstp->rq_flags);
+ set_bit(RQ_DROPME, &rqstp->rq_flags);
return rpc_success;
}
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 56fba1cba3af..325d3d3f1211 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -453,8 +453,8 @@ static void nfsd_shutdown_net(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- nfsd_file_cache_shutdown_net(net);
nfs4_state_shutdown_net(net);
+ nfsd_file_cache_shutdown_net(net);
if (nn->lockd_up) {
lockd_down(net);
nn->lockd_up = false;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index c852ae8eaf37..8f9c82d9e075 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -981,43 +981,6 @@ TRACE_EVENT(nfsd_file_acquire,
)
);
-TRACE_EVENT(nfsd_file_create,
- TP_PROTO(
- const struct svc_rqst *rqstp,
- unsigned int may_flags,
- const struct nfsd_file *nf
- ),
-
- TP_ARGS(rqstp, may_flags, nf),
-
- TP_STRUCT__entry(
- __field(const void *, nf_inode)
- __field(const void *, nf_file)
- __field(unsigned long, may_flags)
- __field(unsigned long, nf_flags)
- __field(unsigned long, nf_may)
- __field(unsigned int, nf_ref)
- __field(u32, xid)
- ),
-
- TP_fast_assign(
- __entry->nf_inode = nf->nf_inode;
- __entry->nf_file = nf->nf_file;
- __entry->may_flags = may_flags;
- __entry->nf_flags = nf->nf_flags;
- __entry->nf_may = nf->nf_may;
- __entry->nf_ref = refcount_read(&nf->nf_ref);
- __entry->xid = be32_to_cpu(rqstp->rq_xid);
- ),
-
- TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p",
- __entry->xid, __entry->nf_inode,
- show_nfsd_may_flags(__entry->may_flags),
- __entry->nf_ref, show_nf_flags(__entry->nf_flags),
- show_nfsd_may_flags(__entry->nf_may), __entry->nf_file
- )
-);
-
TRACE_EVENT(nfsd_file_insert_err,
TP_PROTO(
const struct svc_rqst *rqstp,
@@ -1079,8 +1042,8 @@ TRACE_EVENT(nfsd_file_cons_err,
)
);
-TRACE_EVENT(nfsd_file_open,
- TP_PROTO(struct nfsd_file *nf, __be32 status),
+DECLARE_EVENT_CLASS(nfsd_file_open_class,
+ TP_PROTO(const struct nfsd_file *nf, __be32 status),
TP_ARGS(nf, status),
TP_STRUCT__entry(
__field(void *, nf_inode) /* cannot be dereferenced */
@@ -1104,6 +1067,17 @@ TRACE_EVENT(nfsd_file_open,
__entry->nf_file)
)
+#define DEFINE_NFSD_FILE_OPEN_EVENT(name) \
+DEFINE_EVENT(nfsd_file_open_class, name, \
+ TP_PROTO( \
+ const struct nfsd_file *nf, \
+ __be32 status \
+ ), \
+ TP_ARGS(nf, status))
+
+DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_open);
+DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_opened);
+
TRACE_EVENT(nfsd_file_is_cached,
TP_PROTO(
const struct inode *inode,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 4c3a0d84043c..ab4ee3509ce3 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -426,7 +426,7 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap)
if (iap->ia_size < 0)
return -EFBIG;
- host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL);
+ host_err = notify_change(&nop_mnt_idmap, dentry, &size_attr, NULL);
if (host_err)
return host_err;
iap->ia_valid &= ~ATTR_SIZE;
@@ -444,7 +444,7 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap)
return 0;
iap->ia_valid |= ATTR_CTIME;
- return notify_change(&init_user_ns, dentry, iap, NULL);
+ return notify_change(&nop_mnt_idmap, dentry, iap, NULL);
}
/**
@@ -542,12 +542,12 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
attr->na_labelerr = security_inode_setsecctx(dentry,
attr->na_seclabel->data, attr->na_seclabel->len);
if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && attr->na_pacl)
- attr->na_aclerr = set_posix_acl(&init_user_ns,
+ attr->na_aclerr = set_posix_acl(&nop_mnt_idmap,
dentry, ACL_TYPE_ACCESS,
attr->na_pacl);
if (IS_ENABLED(CONFIG_FS_POSIX_ACL) &&
!attr->na_aclerr && attr->na_dpacl && S_ISDIR(inode->i_mode))
- attr->na_aclerr = set_posix_acl(&init_user_ns,
+ attr->na_aclerr = set_posix_acl(&nop_mnt_idmap,
dentry, ACL_TYPE_DEFAULT,
attr->na_dpacl);
inode_unlock(inode);
@@ -583,7 +583,7 @@ int nfsd4_is_junction(struct dentry *dentry)
return 0;
if (!(inode->i_mode & S_ISVTX))
return 0;
- if (vfs_getxattr(&init_user_ns, dentry, NFSD_JUNCTION_XATTR_NAME,
+ if (vfs_getxattr(&nop_mnt_idmap, dentry, NFSD_JUNCTION_XATTR_NAME,
NULL, 0) <= 0)
return 0;
return 1;
@@ -1363,12 +1363,13 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = 0;
switch (type) {
case S_IFREG:
- host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true);
+ host_err = vfs_create(&nop_mnt_idmap, dirp, dchild,
+ iap->ia_mode, true);
if (!host_err)
nfsd_check_ignore_resizing(iap);
break;
case S_IFDIR:
- host_err = vfs_mkdir(&init_user_ns, dirp, dchild, iap->ia_mode);
+ host_err = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode);
if (!host_err && unlikely(d_unhashed(dchild))) {
struct dentry *d;
d = lookup_one_len(dchild->d_name.name,
@@ -1396,7 +1397,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
case S_IFBLK:
case S_IFIFO:
case S_IFSOCK:
- host_err = vfs_mknod(&init_user_ns, dirp, dchild,
+ host_err = vfs_mknod(&nop_mnt_idmap, dirp, dchild,
iap->ia_mode, rdev);
break;
default:
@@ -1557,7 +1558,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out_drop_write;
}
fh_fill_pre_attrs(fhp);
- host_err = vfs_symlink(&init_user_ns, d_inode(dentry), dnew, path);
+ host_err = vfs_symlink(&nop_mnt_idmap, d_inode(dentry), dnew, path);
err = nfserrno(host_err);
cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
if (!err)
@@ -1625,7 +1626,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
if (d_really_is_negative(dold))
goto out_dput;
fh_fill_pre_attrs(ffhp);
- host_err = vfs_link(dold, &init_user_ns, dirp, dnew, NULL);
+ host_err = vfs_link(dold, &nop_mnt_idmap, dirp, dnew, NULL);
fh_fill_post_attrs(ffhp);
inode_unlock(dirp);
if (!host_err) {
@@ -1745,10 +1746,10 @@ retry:
goto out_dput_old;
} else {
struct renamedata rd = {
- .old_mnt_userns = &init_user_ns,
+ .old_mnt_idmap = &nop_mnt_idmap,
.old_dir = fdir,
.old_dentry = odentry,
- .new_mnt_userns = &init_user_ns,
+ .new_mnt_idmap = &nop_mnt_idmap,
.new_dir = tdir,
.new_dentry = ndentry,
};
@@ -1850,14 +1851,14 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
nfsd_close_cached_files(rdentry);
for (retries = 1;;) {
- host_err = vfs_unlink(&init_user_ns, dirp, rdentry, NULL);
+ host_err = vfs_unlink(&nop_mnt_idmap, dirp, rdentry, NULL);
if (host_err != -EAGAIN || !retries--)
break;
if (!nfsd_wait_for_delegreturn(rqstp, rinode))
break;
}
} else {
- host_err = vfs_rmdir(&init_user_ns, dirp, rdentry);
+ host_err = vfs_rmdir(&nop_mnt_idmap, dirp, rdentry);
}
fh_fill_post_attrs(fhp);
@@ -2129,7 +2130,7 @@ nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
inode_lock_shared(inode);
- len = vfs_getxattr(&init_user_ns, dentry, name, NULL, 0);
+ len = vfs_getxattr(&nop_mnt_idmap, dentry, name, NULL, 0);
/*
* Zero-length attribute, just return.
@@ -2156,7 +2157,7 @@ nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
goto out;
}
- len = vfs_getxattr(&init_user_ns, dentry, name, buf, len);
+ len = vfs_getxattr(&nop_mnt_idmap, dentry, name, buf, len);
if (len <= 0) {
kvfree(buf);
buf = NULL;
@@ -2267,7 +2268,7 @@ nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name)
inode_lock(fhp->fh_dentry->d_inode);
fh_fill_pre_attrs(fhp);
- ret = __vfs_removexattr_locked(&init_user_ns, fhp->fh_dentry,
+ ret = __vfs_removexattr_locked(&nop_mnt_idmap, fhp->fh_dentry,
name, NULL);
fh_fill_post_attrs(fhp);
@@ -2294,7 +2295,7 @@ nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
inode_lock(fhp->fh_dentry->d_inode);
fh_fill_pre_attrs(fhp);
- ret = __vfs_setxattr_locked(&init_user_ns, fhp->fh_dentry, name, buf,
+ ret = __vfs_setxattr_locked(&nop_mnt_idmap, fhp->fh_dentry, name, buf,
len, flags, NULL);
fh_fill_post_attrs(fhp);
inode_unlock(fhp->fh_dentry->d_inode);
@@ -2378,14 +2379,14 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
return 0;
/* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
- err = inode_permission(&init_user_ns, inode,
+ err = inode_permission(&nop_mnt_idmap, inode,
acc & (MAY_READ | MAY_WRITE | MAY_EXEC));
/* Allow read access to binaries even when mode 111 */
if (err == -EACCES && S_ISREG(inode->i_mode) &&
(acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE) ||
acc == (NFSD_MAY_READ | NFSD_MAY_READ_IF_EXEC)))
- err = inode_permission(&init_user_ns, inode, MAY_EXEC);
+ err = inode_permission(&nop_mnt_idmap, inode, MAY_EXEC);
return err? nfserrno(err) : 0;
}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index dbdfef7ae85b..43fb57a301d3 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -170,9 +170,14 @@ static inline void fh_drop_write(struct svc_fh *fh)
static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat)
{
+ u32 request_mask = STATX_BASIC_STATS;
struct path p = {.mnt = fh->fh_export->ex_path.mnt,
.dentry = fh->fh_dentry};
- return nfserrno(vfs_getattr(&p, stat, STATX_BASIC_STATS,
+
+ if (fh->fh_maxsize == NFS4_FHSIZE)
+ request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE);
+
+ return nfserrno(vfs_getattr(&p, stat, request_mask,
AT_STATX_SYNC_AS_STAT));
}
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index b9d15c3df3cc..40ce92a332fe 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -480,9 +480,18 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh,
&submit_ptr);
if (ret) {
- if (ret != -EEXIST)
- return ret;
- goto out_check;
+ if (likely(ret == -EEXIST))
+ goto out_check;
+ if (ret == -ENOENT) {
+ /*
+ * Block address translation failed due to invalid
+ * value of 'ptr'. In this case, return internal code
+ * -EINVAL (broken bmap) to notify bmap layer of fatal
+ * metadata corruption.
+ */
+ ret = -EINVAL;
+ }
+ return ret;
}
if (ra) {
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 232dd7b6cca1..1310d2d5feb3 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -364,7 +364,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
ii->i_bh = bh;
atomic64_inc(&root->inodes_count);
- inode_init_owner(&init_user_ns, inode, dir, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
inode->i_ino = ino;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
@@ -949,7 +949,7 @@ void nilfs_evict_inode(struct inode *inode)
*/
}
-int nilfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int nilfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *iattr)
{
struct nilfs_transaction_info ti;
@@ -957,7 +957,7 @@ int nilfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
struct super_block *sb = inode->i_sb;
int err;
- err = setattr_prepare(&init_user_ns, dentry, iattr);
+ err = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
if (err)
return err;
@@ -972,7 +972,7 @@ int nilfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
nilfs_truncate(inode);
}
- setattr_copy(&init_user_ns, inode, iattr);
+ setattr_copy(&nop_mnt_idmap, inode, iattr);
mark_inode_dirty(inode);
if (iattr->ia_valid & ATTR_MODE) {
@@ -988,7 +988,7 @@ out_err:
return err;
}
-int nilfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask)
{
struct nilfs_root *root = NILFS_I(inode)->i_root;
@@ -997,7 +997,7 @@ int nilfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
root->cno != NILFS_CPTREE_CURRENT_CNO)
return -EROFS; /* snapshot is not writable */
- return generic_permission(&init_user_ns, inode, mask);
+ return generic_permission(&nop_mnt_idmap, inode, mask);
}
int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 87e1004b606d..5ccc638ae92f 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -128,7 +128,7 @@ int nilfs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
/**
* nilfs_fileattr_set - ioctl to support chattr
*/
-int nilfs_fileattr_set(struct user_namespace *mnt_userns,
+int nilfs_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa)
{
struct inode *inode = d_inode(dentry);
@@ -1114,7 +1114,14 @@ static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
minseg = range[0] + segbytes - 1;
do_div(minseg, segbytes);
+
+ if (range[1] < 4096)
+ goto out;
+
maxseg = NILFS_SB2_OFFSET_BYTES(range[1]);
+ if (maxseg < segbytes)
+ goto out;
+
do_div(maxseg, segbytes);
maxseg--;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 23899e0ae850..c7024da8f1e2 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -72,7 +72,7 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
* If the create succeeds, we fill in the inode information
* with d_instantiate().
*/
-static int nilfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int nilfs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
struct inode *inode;
@@ -100,7 +100,7 @@ static int nilfs_create(struct user_namespace *mnt_userns, struct inode *dir,
}
static int
-nilfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+nilfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct inode *inode;
@@ -125,7 +125,7 @@ nilfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
return err;
}
-static int nilfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int nilfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
struct nilfs_transaction_info ti;
@@ -202,7 +202,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
return err;
}
-static int nilfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int nilfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct inode *inode;
@@ -340,7 +340,7 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry)
return err;
}
-static int nilfs_rename(struct user_namespace *mnt_userns,
+static int nilfs_rename(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index aecda4fc95f5..8046490cd7fe 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -242,7 +242,7 @@ extern int nilfs_sync_file(struct file *, loff_t, loff_t, int);
/* ioctl.c */
int nilfs_fileattr_get(struct dentry *dentry, struct fileattr *m);
-int nilfs_fileattr_set(struct user_namespace *mnt_userns,
+int nilfs_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
long nilfs_ioctl(struct file *, unsigned int, unsigned long);
long nilfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
@@ -271,10 +271,10 @@ struct inode *nilfs_iget_for_shadow(struct inode *inode);
extern void nilfs_update_inode(struct inode *, struct buffer_head *, int);
extern void nilfs_truncate(struct inode *);
extern void nilfs_evict_inode(struct inode *);
-extern int nilfs_setattr(struct user_namespace *, struct dentry *,
+extern int nilfs_setattr(struct mnt_idmap *, struct dentry *,
struct iattr *);
extern void nilfs_write_failed(struct address_space *mapping, loff_t to);
-int nilfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask);
int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh);
extern int nilfs_inode_dirty(struct inode *);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 6edb6e0dd61f..1422b8ba24ed 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -409,6 +409,15 @@ int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
goto out;
/*
+ * Prevent underflow in second superblock position calculation.
+ * The exact minimum size check is done in nilfs_sufile_resize().
+ */
+ if (newsize < 4096) {
+ ret = -ENOSPC;
+ goto out;
+ }
+
+ /*
* Write lock is required to protect some functions depending
* on the number of segments, the number of reserved segments,
* and so forth.
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 2064e6473d30..3a4c9c150cbf 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -544,9 +544,15 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
{
struct nilfs_super_block **sbp = nilfs->ns_sbp;
struct buffer_head **sbh = nilfs->ns_sbh;
- u64 sb2off = NILFS_SB2_OFFSET_BYTES(bdev_nr_bytes(nilfs->ns_bdev));
+ u64 sb2off, devsize = bdev_nr_bytes(nilfs->ns_bdev);
int valid[2], swp = 0;
+ if (devsize < NILFS_SEG_MIN_BLOCKS * NILFS_MIN_BLOCK_SIZE + 4096) {
+ nilfs_err(sb, "device size too small");
+ return -EINVAL;
+ }
+ sb2off = NILFS_SB2_OFFSET_BYTES(devsize);
+
sbp[0] = nilfs_read_super_block(sb, NILFS_SB_OFFSET_BYTES, blocksize,
&sbh[0]);
sbp[1] = nilfs_read_super_block(sb, sb2off, blocksize, &sbh[1]);
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index a2a15bc4df28..29bdd99b29fa 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -262,7 +262,7 @@ static int fanotify_get_response(struct fsnotify_group *group,
}
/* userspace responded, convert to something usable */
- switch (event->response & ~FAN_AUDIT) {
+ switch (event->response & FANOTIFY_RESPONSE_ACCESS) {
case FAN_ALLOW:
ret = 0;
break;
@@ -273,7 +273,8 @@ static int fanotify_get_response(struct fsnotify_group *group,
/* Check if the response should be audited */
if (event->response & FAN_AUDIT)
- audit_fanotify(event->response & ~FAN_AUDIT);
+ audit_fanotify(event->response & ~FAN_AUDIT,
+ &event->audit_rule);
pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
group, event, ret);
@@ -563,6 +564,9 @@ static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path,
pevent->fae.type = FANOTIFY_EVENT_TYPE_PATH_PERM;
pevent->response = 0;
+ pevent->hdr.type = FAN_RESPONSE_INFO_NONE;
+ pevent->hdr.pad = 0;
+ pevent->hdr.len = 0;
pevent->state = FAN_EVENT_INIT;
pevent->path = *path;
path_get(path);
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 57f51a9a3015..e8a3c28c5d12 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -425,9 +425,13 @@ FANOTIFY_PE(struct fanotify_event *event)
struct fanotify_perm_event {
struct fanotify_event fae;
struct path path;
- unsigned short response; /* userspace answer to the event */
+ u32 response; /* userspace answer to the event */
unsigned short state; /* state of the event */
int fd; /* fd we passed to userspace for this event */
+ union {
+ struct fanotify_response_info_header hdr;
+ struct fanotify_response_info_audit_rule audit_rule;
+ };
};
static inline struct fanotify_perm_event *
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 4546da4a54f9..8f430bfad487 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -283,19 +283,42 @@ static int create_fd(struct fsnotify_group *group, const struct path *path,
return client_fd;
}
+static int process_access_response_info(const char __user *info,
+ size_t info_len,
+ struct fanotify_response_info_audit_rule *friar)
+{
+ if (info_len != sizeof(*friar))
+ return -EINVAL;
+
+ if (copy_from_user(friar, info, sizeof(*friar)))
+ return -EFAULT;
+
+ if (friar->hdr.type != FAN_RESPONSE_INFO_AUDIT_RULE)
+ return -EINVAL;
+ if (friar->hdr.pad != 0)
+ return -EINVAL;
+ if (friar->hdr.len != sizeof(*friar))
+ return -EINVAL;
+
+ return info_len;
+}
+
/*
* Finish processing of permission event by setting it to ANSWERED state and
* drop group->notification_lock.
*/
static void finish_permission_event(struct fsnotify_group *group,
- struct fanotify_perm_event *event,
- unsigned int response)
+ struct fanotify_perm_event *event, u32 response,
+ struct fanotify_response_info_audit_rule *friar)
__releases(&group->notification_lock)
{
bool destroy = false;
assert_spin_locked(&group->notification_lock);
- event->response = response;
+ event->response = response & ~FAN_INFO;
+ if (response & FAN_INFO)
+ memcpy(&event->audit_rule, friar, sizeof(*friar));
+
if (event->state == FAN_EVENT_CANCELED)
destroy = true;
else
@@ -306,20 +329,27 @@ static void finish_permission_event(struct fsnotify_group *group,
}
static int process_access_response(struct fsnotify_group *group,
- struct fanotify_response *response_struct)
+ struct fanotify_response *response_struct,
+ const char __user *info,
+ size_t info_len)
{
struct fanotify_perm_event *event;
int fd = response_struct->fd;
- int response = response_struct->response;
+ u32 response = response_struct->response;
+ int ret = info_len;
+ struct fanotify_response_info_audit_rule friar;
- pr_debug("%s: group=%p fd=%d response=%d\n", __func__, group,
- fd, response);
+ pr_debug("%s: group=%p fd=%d response=%u buf=%p size=%zu\n", __func__,
+ group, fd, response, info, info_len);
/*
* make sure the response is valid, if invalid we do nothing and either
* userspace can send a valid response or we will clean it up after the
* timeout
*/
- switch (response & ~FAN_AUDIT) {
+ if (response & ~FANOTIFY_RESPONSE_VALID_MASK)
+ return -EINVAL;
+
+ switch (response & FANOTIFY_RESPONSE_ACCESS) {
case FAN_ALLOW:
case FAN_DENY:
break;
@@ -327,10 +357,20 @@ static int process_access_response(struct fsnotify_group *group,
return -EINVAL;
}
- if (fd < 0)
+ if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT))
return -EINVAL;
- if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT))
+ if (response & FAN_INFO) {
+ ret = process_access_response_info(info, info_len, &friar);
+ if (ret < 0)
+ return ret;
+ if (fd == FAN_NOFD)
+ return ret;
+ } else {
+ ret = 0;
+ }
+
+ if (fd < 0)
return -EINVAL;
spin_lock(&group->notification_lock);
@@ -340,9 +380,9 @@ static int process_access_response(struct fsnotify_group *group,
continue;
list_del_init(&event->fae.fse.list);
- finish_permission_event(group, event, response);
+ finish_permission_event(group, event, response, &friar);
wake_up(&group->fanotify_data.access_waitq);
- return 0;
+ return ret;
}
spin_unlock(&group->notification_lock);
@@ -804,7 +844,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
if (ret <= 0) {
spin_lock(&group->notification_lock);
finish_permission_event(group,
- FANOTIFY_PERM(event), FAN_DENY);
+ FANOTIFY_PERM(event), FAN_DENY, NULL);
wake_up(&group->fanotify_data.access_waitq);
} else {
spin_lock(&group->notification_lock);
@@ -827,28 +867,32 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
{
- struct fanotify_response response = { .fd = -1, .response = -1 };
+ struct fanotify_response response;
struct fsnotify_group *group;
int ret;
+ const char __user *info_buf = buf + sizeof(struct fanotify_response);
+ size_t info_len;
if (!IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS))
return -EINVAL;
group = file->private_data;
+ pr_debug("%s: group=%p count=%zu\n", __func__, group, count);
+
if (count < sizeof(response))
return -EINVAL;
- count = sizeof(response);
-
- pr_debug("%s: group=%p count=%zu\n", __func__, group, count);
-
- if (copy_from_user(&response, buf, count))
+ if (copy_from_user(&response, buf, sizeof(response)))
return -EFAULT;
- ret = process_access_response(group, &response);
+ info_len = count - sizeof(response);
+
+ ret = process_access_response(group, &response, info_buf, info_len);
if (ret < 0)
count = ret;
+ else
+ count = sizeof(response) + ret;
return count;
}
@@ -876,7 +920,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
event = list_first_entry(&group->fanotify_data.access_list,
struct fanotify_perm_event, fae.fse.list);
list_del_init(&event->fae.fse.list);
- finish_permission_event(group, event, FAN_ALLOW);
+ finish_permission_event(group, event, FAN_ALLOW, NULL);
spin_lock(&group->notification_lock);
}
@@ -893,7 +937,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
fsnotify_destroy_event(group, fsn_event);
} else {
finish_permission_event(group, FANOTIFY_PERM(event),
- FAN_ALLOW);
+ FAN_ALLOW, NULL);
}
spin_lock(&group->notification_lock);
}
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 08c659332e26..e6fc5f7cb1d7 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2865,7 +2865,7 @@ void ntfs_truncate_vfs(struct inode *vi) {
/**
* ntfs_setattr - called from notify_change() when an attribute is being changed
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dentry: dentry whose attributes to change
* @attr: structure describing the attributes and the changes
*
@@ -2878,14 +2878,14 @@ void ntfs_truncate_vfs(struct inode *vi) {
*
* Called with ->i_mutex held.
*/
-int ntfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ntfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct inode *vi = d_inode(dentry);
int err;
unsigned int ia_valid = attr->ia_valid;
- err = setattr_prepare(&init_user_ns, dentry, attr);
+ err = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (err)
goto out;
/* We do not support NTFS ACLs yet. */
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index 6f78ee00f57f..147ef4ddb691 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -289,7 +289,7 @@ extern int ntfs_show_options(struct seq_file *sf, struct dentry *root);
extern int ntfs_truncate(struct inode *vi);
extern void ntfs_truncate_vfs(struct inode *vi);
-extern int ntfs_setattr(struct user_namespace *mnt_userns,
+extern int ntfs_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr);
extern int __ntfs_write_inode(struct inode *vi, int sync);
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index e5399ebc3a2b..e9bdc1ff08c9 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -70,7 +70,7 @@ static long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg)
/*
* ntfs_getattr - inode_operations::getattr
*/
-int ntfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, u32 flags)
{
struct inode *inode = d_inode(path->dentry);
@@ -84,7 +84,7 @@ int ntfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
stat->attributes_mask |= STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED;
- generic_fillattr(mnt_userns, inode, stat);
+ generic_fillattr(idmap, inode, stat);
stat->result_mask |= STATX_BTIME;
stat->btime = ni->i_crtime;
@@ -390,10 +390,10 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size)
new_valid = ntfs_up_block(sb, min_t(u64, ni->i_valid, new_size));
- ni_lock(ni);
-
truncate_setsize(inode, new_size);
+ ni_lock(ni);
+
down_write(&ni->file.run_lock);
err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size,
&new_valid, ni->mi.sbi->options->prealloc, NULL);
@@ -657,7 +657,7 @@ out:
/*
* ntfs3_setattr - inode_operations::setattr
*/
-int ntfs3_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct super_block *sb = dentry->d_sb;
@@ -676,7 +676,7 @@ int ntfs3_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
ia_valid = attr->ia_valid;
}
- err = setattr_prepare(mnt_userns, dentry, attr);
+ err = setattr_prepare(idmap, dentry, attr);
if (err)
goto out;
@@ -704,10 +704,10 @@ int ntfs3_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
inode->i_size = newsize;
}
- setattr_copy(mnt_userns, inode, attr);
+ setattr_copy(idmap, inode, attr);
if (mode != inode->i_mode) {
- err = ntfs_acl_chmod(mnt_userns, dentry);
+ err = ntfs_acl_chmod(idmap, dentry);
if (err)
goto out;
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index 20b953871574..8ce2616b087f 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -1185,7 +1185,7 @@ out:
*
* NOTE: if fnd != NULL (ntfs_atomic_open) then @dir is locked
*/
-struct inode *ntfs_create_inode(struct user_namespace *mnt_userns,
+struct inode *ntfs_create_inode(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry,
const struct cpu_str *uni, umode_t mode,
dev_t dev, const char *symname, u32 size,
@@ -1307,7 +1307,7 @@ struct inode *ntfs_create_inode(struct user_namespace *mnt_userns,
goto out3;
}
inode = &ni->vfs_inode;
- inode_init_owner(mnt_userns, inode, dir, mode);
+ inode_init_owner(idmap, inode, dir, mode);
mode = inode->i_mode;
inode->i_atime = inode->i_mtime = inode->i_ctime = ni->i_crtime =
@@ -1614,7 +1614,7 @@ struct inode *ntfs_create_inode(struct user_namespace *mnt_userns,
#ifdef CONFIG_NTFS3_FS_POSIX_ACL
if (!S_ISLNK(mode) && (sb->s_flags & SB_POSIXACL)) {
- err = ntfs_init_acl(mnt_userns, inode, dir);
+ err = ntfs_init_acl(idmap, inode, dir);
if (err)
goto out7;
} else
diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c
index c8db35e2ae17..407fe92394e2 100644
--- a/fs/ntfs3/namei.c
+++ b/fs/ntfs3/namei.c
@@ -94,12 +94,12 @@ static struct dentry *ntfs_lookup(struct inode *dir, struct dentry *dentry,
/*
* ntfs_create - inode_operations::create
*/
-static int ntfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int ntfs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
struct inode *inode;
- inode = ntfs_create_inode(mnt_userns, dir, dentry, NULL, S_IFREG | mode,
+ inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFREG | mode,
0, NULL, 0, NULL);
return IS_ERR(inode) ? PTR_ERR(inode) : 0;
@@ -110,12 +110,12 @@ static int ntfs_create(struct user_namespace *mnt_userns, struct inode *dir,
*
* inode_operations::mknod
*/
-static int ntfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int ntfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct inode *inode;
- inode = ntfs_create_inode(mnt_userns, dir, dentry, NULL, mode, rdev,
+ inode = ntfs_create_inode(idmap, dir, dentry, NULL, mode, rdev,
NULL, 0, NULL);
return IS_ERR(inode) ? PTR_ERR(inode) : 0;
@@ -183,13 +183,13 @@ static int ntfs_unlink(struct inode *dir, struct dentry *dentry)
/*
* ntfs_symlink - inode_operations::symlink
*/
-static int ntfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int ntfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
u32 size = strlen(symname);
struct inode *inode;
- inode = ntfs_create_inode(mnt_userns, dir, dentry, NULL, S_IFLNK | 0777,
+ inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFLNK | 0777,
0, symname, size, NULL);
return IS_ERR(inode) ? PTR_ERR(inode) : 0;
@@ -198,12 +198,12 @@ static int ntfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
/*
* ntfs_mkdir- inode_operations::mkdir
*/
-static int ntfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int ntfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct inode *inode;
- inode = ntfs_create_inode(mnt_userns, dir, dentry, NULL, S_IFDIR | mode,
+ inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFDIR | mode,
0, NULL, 0, NULL);
return IS_ERR(inode) ? PTR_ERR(inode) : 0;
@@ -229,7 +229,7 @@ static int ntfs_rmdir(struct inode *dir, struct dentry *dentry)
/*
* ntfs_rename - inode_operations::rename
*/
-static int ntfs_rename(struct user_namespace *mnt_userns, struct inode *dir,
+static int ntfs_rename(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, struct inode *new_dir,
struct dentry *new_dentry, u32 flags)
{
@@ -415,13 +415,13 @@ static int ntfs_atomic_open(struct inode *dir, struct dentry *dentry,
/*
* Unfortunately I don't know how to get here correct 'struct nameidata *nd'
- * or 'struct user_namespace *mnt_userns'.
+ * or 'struct mnt_idmap *idmap'.
* See atomic_open in fs/namei.c.
* This is why xfstest/633 failed.
- * Looks like ntfs_atomic_open must accept 'struct user_namespace *mnt_userns' as argument.
+ * Looks like ntfs_atomic_open must accept 'struct mnt_idmap *idmap' as argument.
*/
- inode = ntfs_create_inode(&init_user_ns, dir, dentry, uni, mode, 0,
+ inode = ntfs_create_inode(&nop_mnt_idmap, dir, dentry, uni, mode, 0,
NULL, 0, fnd);
err = IS_ERR(inode) ? PTR_ERR(inode)
: finish_open(file, dentry, ntfs_file_open);
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index 0e051c5595a2..80072e5f96f7 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -492,10 +492,12 @@ bool dir_is_empty(struct inode *dir);
extern const struct file_operations ntfs_dir_operations;
/* Globals from file.c */
-int ntfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, u32 flags);
-int ntfs3_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr);
+void ntfs_sparse_cluster(struct inode *inode, struct page *page0, CLST vcn,
+ CLST len);
int ntfs_file_open(struct inode *inode, struct file *file);
int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
@@ -706,7 +708,7 @@ int ntfs_sync_inode(struct inode *inode);
int ntfs_flush_inodes(struct super_block *sb, struct inode *i1,
struct inode *i2);
int inode_write_data(struct inode *inode, const void *data, size_t bytes);
-struct inode *ntfs_create_inode(struct user_namespace *mnt_userns,
+struct inode *ntfs_create_inode(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry,
const struct cpu_str *uni, umode_t mode,
dev_t dev, const char *symname, u32 size,
@@ -857,17 +859,17 @@ unsigned long ntfs_names_hash(const u16 *name, size_t len, const u16 *upcase,
/* globals from xattr.c */
#ifdef CONFIG_NTFS3_FS_POSIX_ACL
struct posix_acl *ntfs_get_acl(struct inode *inode, int type, bool rcu);
-int ntfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ntfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type);
-int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode,
+int ntfs_init_acl(struct mnt_idmap *idmap, struct inode *inode,
struct inode *dir);
#else
#define ntfs_get_acl NULL
#define ntfs_set_acl NULL
#endif
-int ntfs_acl_chmod(struct user_namespace *mnt_userns, struct dentry *dentry);
-int ntfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+int ntfs_acl_chmod(struct mnt_idmap *idmap, struct dentry *dentry);
+int ntfs_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask);
ssize_t ntfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
extern const struct xattr_handler *ntfs_xattr_handlers[];
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
index 616df209feea..ff64302e87e5 100644
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -578,7 +578,7 @@ struct posix_acl *ntfs_get_acl(struct inode *inode, int type, bool rcu)
return ntfs_get_acl_ex(inode, type, 0);
}
-static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns,
+static noinline int ntfs_set_acl_ex(struct mnt_idmap *idmap,
struct inode *inode, struct posix_acl *acl,
int type, bool init_acl)
{
@@ -597,7 +597,7 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns,
case ACL_TYPE_ACCESS:
/* Do not change i_mode if we are in init_acl */
if (acl && !init_acl) {
- err = posix_acl_update_mode(mnt_userns, inode, &mode,
+ err = posix_acl_update_mode(idmap, inode, &mode,
&acl);
if (err)
return err;
@@ -652,10 +652,10 @@ out:
/*
* ntfs_set_acl - inode_operations::set_acl
*/
-int ntfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ntfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
- return ntfs_set_acl_ex(mnt_userns, d_inode(dentry), acl, type, false);
+ return ntfs_set_acl_ex(idmap, d_inode(dentry), acl, type, false);
}
/*
@@ -663,7 +663,7 @@ int ntfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
*
* Called from ntfs_create_inode().
*/
-int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode,
+int ntfs_init_acl(struct mnt_idmap *idmap, struct inode *inode,
struct inode *dir)
{
struct posix_acl *default_acl, *acl;
@@ -674,7 +674,7 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode,
return err;
if (default_acl) {
- err = ntfs_set_acl_ex(mnt_userns, inode, default_acl,
+ err = ntfs_set_acl_ex(idmap, inode, default_acl,
ACL_TYPE_DEFAULT, true);
posix_acl_release(default_acl);
} else {
@@ -683,7 +683,7 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode,
if (acl) {
if (!err)
- err = ntfs_set_acl_ex(mnt_userns, inode, acl,
+ err = ntfs_set_acl_ex(idmap, inode, acl,
ACL_TYPE_ACCESS, true);
posix_acl_release(acl);
} else {
@@ -697,7 +697,7 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode,
/*
* ntfs_acl_chmod - Helper for ntfs3_setattr().
*/
-int ntfs_acl_chmod(struct user_namespace *mnt_userns, struct dentry *dentry)
+int ntfs_acl_chmod(struct mnt_idmap *idmap, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
struct super_block *sb = inode->i_sb;
@@ -708,13 +708,13 @@ int ntfs_acl_chmod(struct user_namespace *mnt_userns, struct dentry *dentry)
if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP;
- return posix_acl_chmod(mnt_userns, dentry, inode->i_mode);
+ return posix_acl_chmod(idmap, dentry, inode->i_mode);
}
/*
* ntfs_permission - inode_operations::permission
*/
-int ntfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+int ntfs_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask)
{
if (ntfs_sb(inode->i_sb)->options->noacsrules) {
@@ -722,7 +722,7 @@ int ntfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
return 0;
}
- return generic_permission(mnt_userns, inode, mask);
+ return generic_permission(idmap, inode, mask);
}
/*
@@ -835,7 +835,7 @@ out:
* ntfs_setxattr - inode_operations::setxattr
*/
static noinline int ntfs_setxattr(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *de, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 9f19cf9a5a9f..9fd03eaf15f8 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -260,7 +260,7 @@ static int ocfs2_set_acl(handle_t *handle,
return ret;
}
-int ocfs2_iop_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ocfs2_iop_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
struct buffer_head *bh = NULL;
@@ -274,7 +274,7 @@ int ocfs2_iop_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
if (type == ACL_TYPE_ACCESS && acl) {
umode_t mode;
- status = posix_acl_update_mode(&init_user_ns, inode, &mode,
+ status = posix_acl_update_mode(&nop_mnt_idmap, inode, &mode,
&acl);
if (status)
goto unlock;
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index a897c4e41b26..667c6f03fa60 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -17,7 +17,7 @@ struct ocfs2_acl_entry {
};
struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type, bool rcu);
-int ocfs2_iop_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ocfs2_iop_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type);
extern int ocfs2_acl_chmod(struct inode *, struct buffer_head *);
extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 8b2020f92b5f..ba26c5567cff 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -188,18 +188,18 @@ static int dlmfs_file_release(struct inode *inode,
* We do ->setattr() just to override size changes. Our size is the size
* of the LVB and nothing else.
*/
-static int dlmfs_file_setattr(struct user_namespace *mnt_userns,
+static int dlmfs_file_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr)
{
int error;
struct inode *inode = d_inode(dentry);
attr->ia_valid &= ~ATTR_SIZE;
- error = setattr_prepare(&init_user_ns, dentry, attr);
+ error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (error)
return error;
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(&nop_mnt_idmap, inode, attr);
mark_inode_dirty(inode);
return 0;
}
@@ -336,7 +336,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
if (inode) {
inode->i_ino = get_next_ino();
- inode_init_owner(&init_user_ns, inode, NULL, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, NULL, mode);
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
inc_nlink(inode);
@@ -359,7 +359,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
return NULL;
inode->i_ino = get_next_ino();
- inode_init_owner(&init_user_ns, inode, parent, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, parent, mode);
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
ip = DLMFS_I(inode);
@@ -402,7 +402,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
* File creation. Allocate an inode, and we're done..
*/
/* SMP-safe */
-static int dlmfs_mkdir(struct user_namespace * mnt_userns,
+static int dlmfs_mkdir(struct mnt_idmap * idmap,
struct inode * dir,
struct dentry * dentry,
umode_t mode)
@@ -451,7 +451,7 @@ bail:
return status;
}
-static int dlmfs_create(struct user_namespace *mnt_userns,
+static int dlmfs_create(struct mnt_idmap *idmap,
struct inode *dir,
struct dentry *dentry,
umode_t mode,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 5c60b6bc85bf..efb09de4343d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1111,7 +1111,7 @@ out:
return ret;
}
-int ocfs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ocfs2_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
int status = 0, size_change;
@@ -1142,11 +1142,11 @@ int ocfs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if (!(attr->ia_valid & OCFS2_VALID_ATTRS))
return 0;
- status = setattr_prepare(&init_user_ns, dentry, attr);
+ status = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (status)
return status;
- if (is_quota_modification(mnt_userns, inode, attr)) {
+ if (is_quota_modification(&nop_mnt_idmap, inode, attr)) {
status = dquot_initialize(inode);
if (status)
return status;
@@ -1265,7 +1265,7 @@ int ocfs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
}
}
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(&nop_mnt_idmap, inode, attr);
mark_inode_dirty(inode);
status = ocfs2_mark_inode_dirty(handle, inode, bh);
@@ -1302,7 +1302,7 @@ bail:
return status;
}
-int ocfs2_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int ocfs2_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct inode *inode = d_inode(path->dentry);
@@ -1317,7 +1317,7 @@ int ocfs2_getattr(struct user_namespace *mnt_userns, const struct path *path,
goto bail;
}
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
/*
* If there is inline data in the inode, the inode will normally not
* have data blocks allocated (it may have an external xattr block).
@@ -1334,7 +1334,7 @@ bail:
return err;
}
-int ocfs2_permission(struct user_namespace *mnt_userns, struct inode *inode,
+int ocfs2_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask)
{
int ret, had_lock;
@@ -1360,7 +1360,7 @@ int ocfs2_permission(struct user_namespace *mnt_userns, struct inode *inode,
dump_stack();
}
- ret = generic_permission(&init_user_ns, inode, mask);
+ ret = generic_permission(&nop_mnt_idmap, inode, mask);
ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
out:
@@ -1991,7 +1991,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
}
}
- if (file && setattr_should_drop_suidgid(&init_user_ns, file_inode(file))) {
+ if (file && setattr_should_drop_suidgid(&nop_mnt_idmap, file_inode(file))) {
ret = __ocfs2_write_remove_suid(inode, di_bh);
if (ret) {
mlog_errno(ret);
@@ -2279,7 +2279,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
* inode. There's also the dinode i_size state which
* can be lost via setattr during extending writes (we
* set inode->i_size at the end of a write. */
- if (setattr_should_drop_suidgid(&init_user_ns, inode)) {
+ if (setattr_should_drop_suidgid(&nop_mnt_idmap, inode)) {
if (meta_level == 0) {
ocfs2_inode_unlock_for_extent_tree(inode,
&di_bh,
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 71db8f3aa027..8e53e4ac1120 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -49,11 +49,11 @@ int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
u64 new_i_size, u64 zero_to);
int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
loff_t zero_to);
-int ocfs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ocfs2_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr);
-int ocfs2_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int ocfs2_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags);
-int ocfs2_permission(struct user_namespace *mnt_userns,
+int ocfs2_permission(struct mnt_idmap *idmap,
struct inode *inode,
int mask);
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index afd54ec66103..811a6ea374bb 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -82,7 +82,7 @@ int ocfs2_fileattr_get(struct dentry *dentry, struct fileattr *fa)
return status;
}
-int ocfs2_fileattr_set(struct user_namespace *mnt_userns,
+int ocfs2_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa)
{
struct inode *inode = d_inode(dentry);
diff --git a/fs/ocfs2/ioctl.h b/fs/ocfs2/ioctl.h
index 0297c8846945..48a5fdfe87a1 100644
--- a/fs/ocfs2/ioctl.h
+++ b/fs/ocfs2/ioctl.h
@@ -12,7 +12,7 @@
#define OCFS2_IOCTL_PROTO_H
int ocfs2_fileattr_get(struct dentry *dentry, struct fileattr *fa);
-int ocfs2_fileattr_set(struct user_namespace *mnt_userns,
+int ocfs2_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg);
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c
index 73a3854b2afb..f37174e79fad 100644
--- a/fs/ocfs2/locks.c
+++ b/fs/ocfs2/locks.c
@@ -8,6 +8,7 @@
*/
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/fcntl.h>
#include <cluster/masklog.h>
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index a8fd51afb794..9175dbc47201 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -197,8 +197,8 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
* callers. */
if (S_ISDIR(mode))
set_nlink(inode, 2);
- mode = mode_strip_sgid(&init_user_ns, dir, mode);
- inode_init_owner(&init_user_ns, inode, dir, mode);
+ mode = mode_strip_sgid(&nop_mnt_idmap, dir, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
status = dquot_initialize(inode);
if (status)
return ERR_PTR(status);
@@ -221,7 +221,7 @@ static void ocfs2_cleanup_add_entry_failure(struct ocfs2_super *osb,
iput(inode);
}
-static int ocfs2_mknod(struct user_namespace *mnt_userns,
+static int ocfs2_mknod(struct mnt_idmap *idmap,
struct inode *dir,
struct dentry *dentry,
umode_t mode,
@@ -642,7 +642,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
fe_blkno, suballoc_loc, suballoc_bit);
}
-static int ocfs2_mkdir(struct user_namespace *mnt_userns,
+static int ocfs2_mkdir(struct mnt_idmap *idmap,
struct inode *dir,
struct dentry *dentry,
umode_t mode)
@@ -651,14 +651,14 @@ static int ocfs2_mkdir(struct user_namespace *mnt_userns,
trace_ocfs2_mkdir(dir, dentry, dentry->d_name.len, dentry->d_name.name,
OCFS2_I(dir)->ip_blkno, mode);
- ret = ocfs2_mknod(&init_user_ns, dir, dentry, mode | S_IFDIR, 0);
+ ret = ocfs2_mknod(&nop_mnt_idmap, dir, dentry, mode | S_IFDIR, 0);
if (ret)
mlog_errno(ret);
return ret;
}
-static int ocfs2_create(struct user_namespace *mnt_userns,
+static int ocfs2_create(struct mnt_idmap *idmap,
struct inode *dir,
struct dentry *dentry,
umode_t mode,
@@ -668,7 +668,7 @@ static int ocfs2_create(struct user_namespace *mnt_userns,
trace_ocfs2_create(dir, dentry, dentry->d_name.len, dentry->d_name.name,
(unsigned long long)OCFS2_I(dir)->ip_blkno, mode);
- ret = ocfs2_mknod(&init_user_ns, dir, dentry, mode | S_IFREG, 0);
+ ret = ocfs2_mknod(&nop_mnt_idmap, dir, dentry, mode | S_IFREG, 0);
if (ret)
mlog_errno(ret);
@@ -1194,7 +1194,7 @@ static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2)
ocfs2_inode_unlock(inode2, 1);
}
-static int ocfs2_rename(struct user_namespace *mnt_userns,
+static int ocfs2_rename(struct mnt_idmap *idmap,
struct inode *old_dir,
struct dentry *old_dentry,
struct inode *new_dir,
@@ -1784,7 +1784,7 @@ bail:
return status;
}
-static int ocfs2_symlink(struct user_namespace *mnt_userns,
+static int ocfs2_symlink(struct mnt_idmap *idmap,
struct inode *dir,
struct dentry *dentry,
const char *symname)
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 623db358b1ef..5a656dc683f1 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4316,7 +4316,7 @@ static inline int ocfs2_may_create(struct inode *dir, struct dentry *child)
return -EEXIST;
if (IS_DEADDIR(dir))
return -ENOENT;
- return inode_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC);
+ return inode_permission(&nop_mnt_idmap, dir, MAY_WRITE | MAY_EXEC);
}
/**
@@ -4370,7 +4370,7 @@ static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
* file.
*/
if (!preserve) {
- error = inode_permission(&init_user_ns, inode, MAY_READ);
+ error = inode_permission(&nop_mnt_idmap, inode, MAY_READ);
if (error)
return error;
}
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 64e6ddcfe329..05d4414d0c33 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -9,6 +9,7 @@
#include <linux/module.h>
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/miscdevice.h>
#include <linux/mutex.h>
#include <linux/slab.h>
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 95d0611c5fc7..389308efe854 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -7247,7 +7247,7 @@ static int ocfs2_xattr_security_get(const struct xattr_handler *handler,
}
static int ocfs2_xattr_security_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
@@ -7320,7 +7320,7 @@ static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler,
}
static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
@@ -7351,7 +7351,7 @@ static int ocfs2_xattr_user_get(const struct xattr_handler *handler,
}
static int ocfs2_xattr_user_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index c219f91f44e9..82cf7e9a665f 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -279,13 +279,13 @@ out_free_inode:
return err;
}
-static int omfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int omfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
return omfs_add_node(dir, dentry, mode | S_IFDIR);
}
-static int omfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int omfs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
return omfs_add_node(dir, dentry, mode | S_IFREG);
@@ -370,7 +370,7 @@ static bool omfs_fill_chain(struct inode *dir, struct dir_context *ctx,
return true;
}
-static int omfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+static int omfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 3a5b4b88a583..0101f1f87b56 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -337,13 +337,13 @@ const struct file_operations omfs_file_operations = {
.splice_read = generic_file_splice_read,
};
-static int omfs_setattr(struct user_namespace *mnt_userns,
+static int omfs_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
int error;
- error = setattr_prepare(&init_user_ns, dentry, attr);
+ error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (error)
return error;
@@ -356,7 +356,7 @@ static int omfs_setattr(struct user_namespace *mnt_userns,
omfs_truncate(inode);
}
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(&nop_mnt_idmap, inode, attr);
mark_inode_dirty(inode);
return 0;
}
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 2a0e83236c01..c4c79e07efc7 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -48,7 +48,7 @@ struct inode *omfs_new_inode(struct inode *dir, umode_t mode)
goto fail;
inode->i_ino = new_block;
- inode_init_owner(&init_user_ns, inode, NULL, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, NULL, mode);
inode->i_mapping->a_ops = &omfs_aops;
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
diff --git a/fs/open.c b/fs/open.c
index 82c1a28b3308..8038cf652583 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -33,10 +33,11 @@
#include <linux/dnotify.h>
#include <linux/compat.h>
#include <linux/mnt_idmapping.h>
+#include <linux/filelock.h>
#include "internal.h"
-int do_truncate(struct user_namespace *mnt_userns, struct dentry *dentry,
+int do_truncate(struct mnt_idmap *idmap, struct dentry *dentry,
loff_t length, unsigned int time_attrs, struct file *filp)
{
int ret;
@@ -54,7 +55,7 @@ int do_truncate(struct user_namespace *mnt_userns, struct dentry *dentry,
}
/* Remove suid, sgid, and file capabilities on truncate too */
- ret = dentry_needs_remove_privs(mnt_userns, dentry);
+ ret = dentry_needs_remove_privs(idmap, dentry);
if (ret < 0)
return ret;
if (ret)
@@ -62,14 +63,14 @@ int do_truncate(struct user_namespace *mnt_userns, struct dentry *dentry,
inode_lock(dentry->d_inode);
/* Note any delegations or leases have already been broken: */
- ret = notify_change(mnt_userns, dentry, &newattrs, NULL);
+ ret = notify_change(idmap, dentry, &newattrs, NULL);
inode_unlock(dentry->d_inode);
return ret;
}
long vfs_truncate(const struct path *path, loff_t length)
{
- struct user_namespace *mnt_userns;
+ struct mnt_idmap *idmap;
struct inode *inode;
long error;
@@ -85,8 +86,8 @@ long vfs_truncate(const struct path *path, loff_t length)
if (error)
goto out;
- mnt_userns = mnt_user_ns(path->mnt);
- error = inode_permission(mnt_userns, inode, MAY_WRITE);
+ idmap = mnt_idmap(path->mnt);
+ error = inode_permission(idmap, inode, MAY_WRITE);
if (error)
goto mnt_drop_write_and_out;
@@ -108,7 +109,7 @@ long vfs_truncate(const struct path *path, loff_t length)
error = security_path_truncate(path);
if (!error)
- error = do_truncate(mnt_userns, path->dentry, length, 0, NULL);
+ error = do_truncate(idmap, path->dentry, length, 0, NULL);
put_write_and_out:
put_write_access(inode);
@@ -190,7 +191,7 @@ long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
sb_start_write(inode->i_sb);
error = security_file_truncate(f.file);
if (!error)
- error = do_truncate(file_mnt_user_ns(f.file), dentry, length,
+ error = do_truncate(file_mnt_idmap(f.file), dentry, length,
ATTR_MTIME | ATTR_CTIME, f.file);
sb_end_write(inode->i_sb);
out_putf:
@@ -459,7 +460,7 @@ retry:
goto out_path_release;
}
- res = inode_permission(mnt_user_ns(path.mnt), inode, mode | MAY_ACCESS);
+ res = inode_permission(mnt_idmap(path.mnt), inode, mode | MAY_ACCESS);
/* SuS v2 requires we report a read only fs too */
if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
goto out_path_release;
@@ -603,7 +604,7 @@ retry_deleg:
goto out_unlock;
newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
- error = notify_change(mnt_user_ns(path->mnt), path->dentry,
+ error = notify_change(mnt_idmap(path->mnt), path->dentry,
&newattrs, &delegated_inode);
out_unlock:
inode_unlock(inode);
@@ -701,7 +702,8 @@ static inline bool setattr_vfsgid(struct iattr *attr, kgid_t kgid)
int chown_common(const struct path *path, uid_t user, gid_t group)
{
- struct user_namespace *mnt_userns, *fs_userns;
+ struct mnt_idmap *idmap;
+ struct user_namespace *fs_userns;
struct inode *inode = path->dentry->d_inode;
struct inode *delegated_inode = NULL;
int error;
@@ -712,7 +714,7 @@ int chown_common(const struct path *path, uid_t user, gid_t group)
uid = make_kuid(current_user_ns(), user);
gid = make_kgid(current_user_ns(), group);
- mnt_userns = mnt_user_ns(path->mnt);
+ idmap = mnt_idmap(path->mnt);
fs_userns = i_user_ns(inode);
retry_deleg:
@@ -726,14 +728,14 @@ retry_deleg:
inode_lock(inode);
if (!S_ISDIR(inode->i_mode))
newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV |
- setattr_should_drop_sgid(mnt_userns, inode);
+ setattr_should_drop_sgid(idmap, inode);
/* Continue to send actual fs values, not the mount values. */
error = security_path_chown(
path,
- from_vfsuid(mnt_userns, fs_userns, newattrs.ia_vfsuid),
- from_vfsgid(mnt_userns, fs_userns, newattrs.ia_vfsgid));
+ from_vfsuid(idmap, fs_userns, newattrs.ia_vfsuid),
+ from_vfsgid(idmap, fs_userns, newattrs.ia_vfsgid));
if (!error)
- error = notify_change(mnt_userns, path->dentry, &newattrs,
+ error = notify_change(idmap, path->dentry, &newattrs,
&delegated_inode);
inode_unlock(inode);
if (delegated_inode) {
@@ -870,7 +872,7 @@ static int do_dentry_open(struct file *f,
if (error)
goto cleanup_all;
- error = break_lease(locks_inode(f), f->f_flags);
+ error = break_lease(file_inode(f), f->f_flags);
if (error)
goto cleanup_all;
@@ -1064,7 +1066,7 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode,
if (IS_ERR(f))
return f;
- error = vfs_create(mnt_user_ns(path->mnt),
+ error = vfs_create(mnt_idmap(path->mnt),
d_inode(path->dentry->d_parent),
path->dentry, mode, true);
if (!error)
@@ -1411,8 +1413,9 @@ int filp_close(struct file *filp, fl_owner_t id)
{
int retval = 0;
- if (!file_count(filp)) {
- printk(KERN_ERR "VFS: Close: file count is 0\n");
+ if (CHECK_DATA_CORRUPTION(file_count(filp) == 0,
+ "VFS: Close: file count is 0 (f_op=%ps)",
+ filp->f_op)) {
return 0;
}
diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c
index c5da2091cefb..5aefb705bcc8 100644
--- a/fs/orangefs/acl.c
+++ b/fs/orangefs/acl.c
@@ -118,7 +118,7 @@ out:
return error;
}
-int orangefs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int orangefs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
int error;
@@ -136,7 +136,7 @@ int orangefs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
* and "mode" to the new desired value. It is up to
* us to propagate the new mode back to the server...
*/
- error = posix_acl_update_mode(&init_user_ns, inode,
+ error = posix_acl_update_mode(&nop_mnt_idmap, inode,
&iattr.ia_mode, &acl);
if (error) {
gossip_err("%s: posix_acl_update_mode err: %d\n",
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index 167fa43b24f9..4ecb91a9bbeb 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -14,6 +14,7 @@
#include "orangefs-kernel.h"
#include "orangefs-bufmap.h"
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/pagemap.h>
static int flush_racache(struct inode *inode)
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 4df560894386..11e21a0e65ce 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -822,7 +822,7 @@ again:
ORANGEFS_I(inode)->attr_uid = current_fsuid();
ORANGEFS_I(inode)->attr_gid = current_fsgid();
}
- setattr_copy(&init_user_ns, inode, iattr);
+ setattr_copy(&nop_mnt_idmap, inode, iattr);
spin_unlock(&inode->i_lock);
mark_inode_dirty(inode);
@@ -839,20 +839,20 @@ int __orangefs_setattr_mode(struct dentry *dentry, struct iattr *iattr)
ret = __orangefs_setattr(inode, iattr);
/* change mode on a file that has ACLs */
if (!ret && (iattr->ia_valid & ATTR_MODE))
- ret = posix_acl_chmod(&init_user_ns, dentry, inode->i_mode);
+ ret = posix_acl_chmod(&nop_mnt_idmap, dentry, inode->i_mode);
return ret;
}
/*
* Change attributes of an object referenced by dentry.
*/
-int orangefs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int orangefs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *iattr)
{
int ret;
gossip_debug(GOSSIP_INODE_DEBUG, "__orangefs_setattr: called on %pd\n",
dentry);
- ret = setattr_prepare(&init_user_ns, dentry, iattr);
+ ret = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
if (ret)
goto out;
ret = __orangefs_setattr_mode(dentry, iattr);
@@ -866,7 +866,7 @@ out:
/*
* Obtain attributes of an object given a dentry
*/
-int orangefs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int orangefs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags)
{
int ret;
@@ -879,7 +879,7 @@ int orangefs_getattr(struct user_namespace *mnt_userns, const struct path *path,
ret = orangefs_inode_getattr(inode,
request_mask & STATX_SIZE ? ORANGEFS_GETATTR_SIZE : 0);
if (ret == 0) {
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
/* override block size reported to stat */
if (!(request_mask & STATX_SIZE))
@@ -890,7 +890,7 @@ int orangefs_getattr(struct user_namespace *mnt_userns, const struct path *path,
return ret;
}
-int orangefs_permission(struct user_namespace *mnt_userns,
+int orangefs_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
int ret;
@@ -905,7 +905,7 @@ int orangefs_permission(struct user_namespace *mnt_userns,
if (ret < 0)
return ret;
- return generic_permission(&init_user_ns, inode, mask);
+ return generic_permission(&nop_mnt_idmap, inode, mask);
}
int orangefs_update_time(struct inode *inode, struct timespec64 *time, int flags)
@@ -944,7 +944,7 @@ static int orangefs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
return 0;
}
-static int orangefs_fileattr_set(struct user_namespace *mnt_userns,
+static int orangefs_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa)
{
u64 val = 0;
diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
index 75c1a3dcf68c..77518e248cf7 100644
--- a/fs/orangefs/namei.c
+++ b/fs/orangefs/namei.c
@@ -15,7 +15,7 @@
/*
* Get a newly allocated inode to go with a negative dentry.
*/
-static int orangefs_create(struct user_namespace *mnt_userns,
+static int orangefs_create(struct mnt_idmap *idmap,
struct inode *dir,
struct dentry *dentry,
umode_t mode,
@@ -216,7 +216,7 @@ static int orangefs_unlink(struct inode *dir, struct dentry *dentry)
return ret;
}
-static int orangefs_symlink(struct user_namespace *mnt_userns,
+static int orangefs_symlink(struct mnt_idmap *idmap,
struct inode *dir,
struct dentry *dentry,
const char *symname)
@@ -305,7 +305,7 @@ out:
return ret;
}
-static int orangefs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int orangefs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct orangefs_inode_s *parent = ORANGEFS_I(dir);
@@ -375,7 +375,7 @@ out:
return ret;
}
-static int orangefs_rename(struct user_namespace *mnt_userns,
+static int orangefs_rename(struct mnt_idmap *idmap,
struct inode *old_dir,
struct dentry *old_dentry,
struct inode *new_dir,
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 6e0cc01b3a14..ce20d3443869 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -106,7 +106,7 @@ enum orangefs_vfs_op_states {
extern const struct xattr_handler *orangefs_xattr_handlers[];
extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type, bool rcu);
-extern int orangefs_set_acl(struct user_namespace *mnt_userns,
+extern int orangefs_set_acl(struct mnt_idmap *idmap,
struct dentry *dentry, struct posix_acl *acl,
int type);
int __orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
@@ -362,12 +362,12 @@ struct inode *orangefs_new_inode(struct super_block *sb,
int __orangefs_setattr(struct inode *, struct iattr *);
int __orangefs_setattr_mode(struct dentry *dentry, struct iattr *iattr);
-int orangefs_setattr(struct user_namespace *, struct dentry *, struct iattr *);
+int orangefs_setattr(struct mnt_idmap *, struct dentry *, struct iattr *);
-int orangefs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int orangefs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags);
-int orangefs_permission(struct user_namespace *mnt_userns,
+int orangefs_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask);
int orangefs_update_time(struct inode *, struct timespec64 *, int);
diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c
index 9a5b757fbd2f..6ecad4f94ae6 100644
--- a/fs/orangefs/xattr.c
+++ b/fs/orangefs/xattr.c
@@ -526,7 +526,7 @@ out_unlock:
}
static int orangefs_xattr_set_default(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused,
struct inode *inode,
const char *name,
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 6e4e65ee050d..c14e90764e35 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -792,7 +792,7 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
if (!c->metacopy && c->stat.size) {
err = ovl_copy_up_file(ofs, c->dentry, tmpfile, c->stat.size);
if (err)
- return err;
+ goto out_fput;
}
err = ovl_copy_up_metadata(c, temp);
@@ -1011,6 +1011,10 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
if (err)
return err;
+ if (!kuid_has_mapping(current_user_ns(), ctx.stat.uid) ||
+ !kgid_has_mapping(current_user_ns(), ctx.stat.gid))
+ return -EOVERFLOW;
+
ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags);
if (parent) {
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index f61e37f4c8ff..fc25fb95d5fc 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -641,7 +641,7 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
inode->i_state |= I_CREATING;
spin_unlock(&inode->i_lock);
- inode_init_owner(&init_user_ns, inode, dentry->d_parent->d_inode, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, dentry->d_parent->d_inode, mode);
attr.mode = inode->i_mode;
err = ovl_create_or_link(dentry, inode, &attr, false);
@@ -655,19 +655,19 @@ out:
return err;
}
-static int ovl_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int ovl_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
}
-static int ovl_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int ovl_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
}
-static int ovl_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int ovl_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
/* Don't allow creation of "whiteout" on overlay */
@@ -677,7 +677,7 @@ static int ovl_mknod(struct user_namespace *mnt_userns, struct inode *dir,
return ovl_create_object(dentry, mode, rdev, NULL);
}
-static int ovl_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int ovl_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *link)
{
return ovl_create_object(dentry, S_IFLNK, 0, link);
@@ -1075,7 +1075,7 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
return err;
}
-static int ovl_rename(struct user_namespace *mnt_userns, struct inode *olddir,
+static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
struct dentry *old, struct inode *newdir,
struct dentry *new, unsigned int flags)
{
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index a25bb3453dde..defd4e231ad2 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -392,8 +392,8 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected,
*/
take_dentry_name_snapshot(&name, real);
/*
- * No mnt_userns handling here: it's an internal lookup. Could skip
- * permission checking altogether, but for now just use non-mnt_userns
+ * No idmap handling here: it's an internal lookup. Could skip
+ * permission checking altogether, but for now just use non-idmap
* transformed ids.
*/
this = lookup_one_len(name.name.name, connected, name.name.len);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index c9d0c362c7ef..7c04f033aadd 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -42,7 +42,7 @@ static struct file *ovl_open_realfile(const struct file *file,
{
struct inode *realinode = d_inode(realpath->dentry);
struct inode *inode = file_inode(file);
- struct user_namespace *real_mnt_userns;
+ struct mnt_idmap *real_idmap;
struct file *realfile;
const struct cred *old_cred;
int flags = file->f_flags | OVL_OPEN_FLAGS;
@@ -53,12 +53,12 @@ static struct file *ovl_open_realfile(const struct file *file,
acc_mode |= MAY_APPEND;
old_cred = ovl_override_creds(inode->i_sb);
- real_mnt_userns = mnt_user_ns(realpath->mnt);
- err = inode_permission(real_mnt_userns, realinode, MAY_OPEN | acc_mode);
+ real_idmap = mnt_idmap(realpath->mnt);
+ err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode);
if (err) {
realfile = ERR_PTR(err);
} else {
- if (!inode_owner_or_capable(real_mnt_userns, realinode))
+ if (!inode_owner_or_capable(real_idmap, realinode))
flags &= ~O_NOATIME;
realfile = open_with_fake_path(&file->f_path, flags, realinode,
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index ee6dfa577c93..541cf3717fc2 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -19,7 +19,7 @@
#include "overlayfs.h"
-int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
int err;
@@ -28,7 +28,7 @@ int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
struct dentry *upperdentry;
const struct cred *old_cred;
- err = setattr_prepare(&init_user_ns, dentry, attr);
+ err = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (err)
return err;
@@ -153,7 +153,7 @@ static void ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid)
}
}
-int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int ovl_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct dentry *dentry = path->dentry;
@@ -278,7 +278,7 @@ out:
return err;
}
-int ovl_permission(struct user_namespace *mnt_userns,
+int ovl_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
struct inode *upperinode = ovl_inode_upper(inode);
@@ -298,7 +298,7 @@ int ovl_permission(struct user_namespace *mnt_userns,
* Check overlay inode with the creds of task and underlying inode
* with creds of mounter
*/
- err = generic_permission(&init_user_ns, inode, mask);
+ err = generic_permission(&nop_mnt_idmap, inode, mask);
if (err)
return err;
@@ -310,7 +310,7 @@ int ovl_permission(struct user_namespace *mnt_userns,
/* Make sure mounter can read file for copy up later */
mask |= MAY_READ;
}
- err = inode_permission(mnt_user_ns(realpath.mnt), realinode, mask);
+ err = inode_permission(mnt_idmap(realpath.mnt), realinode, mask);
revert_creds(old_cred);
return err;
@@ -361,7 +361,7 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
if (!value && !upperdentry) {
ovl_path_lower(dentry, &realpath);
old_cred = ovl_override_creds(dentry->d_sb);
- err = vfs_getxattr(mnt_user_ns(realpath.mnt), realdentry, name, NULL, 0);
+ err = vfs_getxattr(mnt_idmap(realpath.mnt), realdentry, name, NULL, 0);
revert_creds(old_cred);
if (err < 0)
goto out_drop_write;
@@ -403,7 +403,7 @@ int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
ovl_i_path_real(inode, &realpath);
old_cred = ovl_override_creds(dentry->d_sb);
- res = vfs_getxattr(mnt_user_ns(realpath.mnt), realpath.dentry, name, value, size);
+ res = vfs_getxattr(mnt_idmap(realpath.mnt), realpath.dentry, name, value, size);
revert_creds(old_cred);
return res;
}
@@ -463,7 +463,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
* alter the POSIX ACLs for the underlying filesystem.
*/
static void ovl_idmap_posix_acl(const struct inode *realinode,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct posix_acl *acl)
{
struct user_namespace *fs_userns = i_user_ns(realinode);
@@ -475,11 +475,11 @@ static void ovl_idmap_posix_acl(const struct inode *realinode,
struct posix_acl_entry *e = &acl->a_entries[i];
switch (e->e_tag) {
case ACL_USER:
- vfsuid = make_vfsuid(mnt_userns, fs_userns, e->e_uid);
+ vfsuid = make_vfsuid(idmap, fs_userns, e->e_uid);
e->e_uid = vfsuid_into_kuid(vfsuid);
break;
case ACL_GROUP:
- vfsgid = make_vfsgid(mnt_userns, fs_userns, e->e_gid);
+ vfsgid = make_vfsgid(idmap, fs_userns, e->e_gid);
e->e_gid = vfsgid_into_kgid(vfsgid);
break;
}
@@ -514,15 +514,15 @@ struct posix_acl *ovl_get_acl_path(const struct path *path,
const char *acl_name, bool noperm)
{
struct posix_acl *real_acl, *clone;
- struct user_namespace *mnt_userns;
+ struct mnt_idmap *idmap;
struct inode *realinode = d_inode(path->dentry);
- mnt_userns = mnt_user_ns(path->mnt);
+ idmap = mnt_idmap(path->mnt);
if (noperm)
real_acl = get_inode_acl(realinode, posix_acl_type(acl_name));
else
- real_acl = vfs_get_acl(mnt_userns, path->dentry, acl_name);
+ real_acl = vfs_get_acl(idmap, path->dentry, acl_name);
if (IS_ERR_OR_NULL(real_acl))
return real_acl;
@@ -540,7 +540,7 @@ struct posix_acl *ovl_get_acl_path(const struct path *path,
if (!clone)
return ERR_PTR(-ENOMEM);
- ovl_idmap_posix_acl(realinode, mnt_userns, clone);
+ ovl_idmap_posix_acl(realinode, idmap, clone);
return clone;
}
@@ -555,7 +555,7 @@ struct posix_acl *ovl_get_acl_path(const struct path *path,
*
* This is obviously only relevant when idmapped layers are used.
*/
-struct posix_acl *do_ovl_get_acl(struct user_namespace *mnt_userns,
+struct posix_acl *do_ovl_get_acl(struct mnt_idmap *idmap,
struct inode *inode, int type,
bool rcu, bool noperm)
{
@@ -618,7 +618,7 @@ static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode,
ovl_path_lower(dentry, &realpath);
old_cred = ovl_override_creds(dentry->d_sb);
- real_acl = vfs_get_acl(mnt_user_ns(realpath.mnt), realdentry,
+ real_acl = vfs_get_acl(mnt_idmap(realpath.mnt), realdentry,
acl_name);
revert_creds(old_cred);
if (IS_ERR(real_acl)) {
@@ -651,7 +651,7 @@ out_drop_write:
return err;
}
-int ovl_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ovl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
int err;
@@ -665,7 +665,7 @@ int ovl_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
return -EOPNOTSUPP;
if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
return acl ? -EACCES : 0;
- if (!inode_owner_or_capable(&init_user_ns, inode))
+ if (!inode_owner_or_capable(&nop_mnt_idmap, inode))
return -EPERM;
/*
@@ -674,10 +674,10 @@ int ovl_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
*/
if (unlikely(inode->i_mode & S_ISGID) && type == ACL_TYPE_ACCESS &&
!in_group_p(inode->i_gid) &&
- !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID)) {
+ !capable_wrt_inode_uidgid(&nop_mnt_idmap, inode, CAP_FSETID)) {
struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };
- err = ovl_setattr(&init_user_ns, dentry, &iattr);
+ err = ovl_setattr(&nop_mnt_idmap, dentry, &iattr);
if (err)
return err;
}
@@ -755,10 +755,10 @@ int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa)
if (err)
return err;
- return vfs_fileattr_set(mnt_user_ns(realpath->mnt), realpath->dentry, fa);
+ return vfs_fileattr_set(mnt_idmap(realpath->mnt), realpath->dentry, fa);
}
-int ovl_fileattr_set(struct user_namespace *mnt_userns,
+int ovl_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa)
{
struct inode *inode = d_inode(dentry);
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 46753134533a..cfb3420b7df0 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -204,7 +204,7 @@ static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d,
struct dentry *base, int len,
bool drop_negative)
{
- struct dentry *ret = lookup_one_unlocked(mnt_user_ns(d->mnt), name, base, len);
+ struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->mnt), name, base, len);
if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
if (drop_negative && ret->d_lockref.count == 1) {
@@ -711,7 +711,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
if (err)
return ERR_PTR(err);
- index = lookup_one_positive_unlocked(ovl_upper_mnt_userns(ofs), name.name,
+ index = lookup_one_positive_unlocked(ovl_upper_mnt_idmap(ofs), name.name,
ofs->indexdir, name.len);
if (IS_ERR(index)) {
err = PTR_ERR(index);
@@ -1182,7 +1182,7 @@ bool ovl_lower_positive(struct dentry *dentry)
struct dentry *this;
struct dentry *lowerdir = poe->lowerstack[i].dentry;
- this = lookup_one_positive_unlocked(mnt_user_ns(poe->lowerstack[i].layer->mnt),
+ this = lookup_one_positive_unlocked(mnt_idmap(poe->lowerstack[i].layer->mnt),
name->name, lowerdir, name->len);
if (IS_ERR(this)) {
switch (PTR_ERR(this)) {
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 1df7f850ff3b..4d0b278f5630 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -141,13 +141,13 @@ static inline int ovl_do_notify_change(struct ovl_fs *ofs,
struct dentry *upperdentry,
struct iattr *attr)
{
- return notify_change(ovl_upper_mnt_userns(ofs), upperdentry, attr, NULL);
+ return notify_change(ovl_upper_mnt_idmap(ofs), upperdentry, attr, NULL);
}
static inline int ovl_do_rmdir(struct ovl_fs *ofs,
struct inode *dir, struct dentry *dentry)
{
- int err = vfs_rmdir(ovl_upper_mnt_userns(ofs), dir, dentry);
+ int err = vfs_rmdir(ovl_upper_mnt_idmap(ofs), dir, dentry);
pr_debug("rmdir(%pd2) = %i\n", dentry, err);
return err;
@@ -156,7 +156,7 @@ static inline int ovl_do_rmdir(struct ovl_fs *ofs,
static inline int ovl_do_unlink(struct ovl_fs *ofs, struct inode *dir,
struct dentry *dentry)
{
- int err = vfs_unlink(ovl_upper_mnt_userns(ofs), dir, dentry, NULL);
+ int err = vfs_unlink(ovl_upper_mnt_idmap(ofs), dir, dentry, NULL);
pr_debug("unlink(%pd2) = %i\n", dentry, err);
return err;
@@ -165,7 +165,8 @@ static inline int ovl_do_unlink(struct ovl_fs *ofs, struct inode *dir,
static inline int ovl_do_link(struct ovl_fs *ofs, struct dentry *old_dentry,
struct inode *dir, struct dentry *new_dentry)
{
- int err = vfs_link(old_dentry, ovl_upper_mnt_userns(ofs), dir, new_dentry, NULL);
+ int err = vfs_link(old_dentry, ovl_upper_mnt_idmap(ofs), dir,
+ new_dentry, NULL);
pr_debug("link(%pd2, %pd2) = %i\n", old_dentry, new_dentry, err);
return err;
@@ -175,7 +176,7 @@ static inline int ovl_do_create(struct ovl_fs *ofs,
struct inode *dir, struct dentry *dentry,
umode_t mode)
{
- int err = vfs_create(ovl_upper_mnt_userns(ofs), dir, dentry, mode, true);
+ int err = vfs_create(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, true);
pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err);
return err;
@@ -185,7 +186,7 @@ static inline int ovl_do_mkdir(struct ovl_fs *ofs,
struct inode *dir, struct dentry *dentry,
umode_t mode)
{
- int err = vfs_mkdir(ovl_upper_mnt_userns(ofs), dir, dentry, mode);
+ int err = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode);
pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err);
return err;
}
@@ -194,7 +195,7 @@ static inline int ovl_do_mknod(struct ovl_fs *ofs,
struct inode *dir, struct dentry *dentry,
umode_t mode, dev_t dev)
{
- int err = vfs_mknod(ovl_upper_mnt_userns(ofs), dir, dentry, mode, dev);
+ int err = vfs_mknod(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, dev);
pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", dentry, mode, dev, err);
return err;
@@ -204,7 +205,7 @@ static inline int ovl_do_symlink(struct ovl_fs *ofs,
struct inode *dir, struct dentry *dentry,
const char *oldname)
{
- int err = vfs_symlink(ovl_upper_mnt_userns(ofs), dir, dentry, oldname);
+ int err = vfs_symlink(ovl_upper_mnt_idmap(ofs), dir, dentry, oldname);
pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err);
return err;
@@ -217,7 +218,7 @@ static inline ssize_t ovl_do_getxattr(const struct path *path, const char *name,
WARN_ON(path->dentry->d_sb != path->mnt->mnt_sb);
- err = vfs_getxattr(mnt_user_ns(path->mnt), path->dentry,
+ err = vfs_getxattr(mnt_idmap(path->mnt), path->dentry,
name, value, size);
len = (value && err > 0) ? err : 0;
@@ -251,7 +252,7 @@ static inline int ovl_do_setxattr(struct ovl_fs *ofs, struct dentry *dentry,
const char *name, const void *value,
size_t size, int flags)
{
- int err = vfs_setxattr(ovl_upper_mnt_userns(ofs), dentry, name,
+ int err = vfs_setxattr(ovl_upper_mnt_idmap(ofs), dentry, name,
value, size, flags);
pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, %d) = %i\n",
@@ -269,7 +270,7 @@ static inline int ovl_setxattr(struct ovl_fs *ofs, struct dentry *dentry,
static inline int ovl_do_removexattr(struct ovl_fs *ofs, struct dentry *dentry,
const char *name)
{
- int err = vfs_removexattr(ovl_upper_mnt_userns(ofs), dentry, name);
+ int err = vfs_removexattr(ovl_upper_mnt_idmap(ofs), dentry, name);
pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err);
return err;
}
@@ -283,13 +284,13 @@ static inline int ovl_removexattr(struct ovl_fs *ofs, struct dentry *dentry,
static inline int ovl_do_set_acl(struct ovl_fs *ofs, struct dentry *dentry,
const char *acl_name, struct posix_acl *acl)
{
- return vfs_set_acl(ovl_upper_mnt_userns(ofs), dentry, acl_name, acl);
+ return vfs_set_acl(ovl_upper_mnt_idmap(ofs), dentry, acl_name, acl);
}
static inline int ovl_do_remove_acl(struct ovl_fs *ofs, struct dentry *dentry,
const char *acl_name)
{
- return vfs_remove_acl(ovl_upper_mnt_userns(ofs), dentry, acl_name);
+ return vfs_remove_acl(ovl_upper_mnt_idmap(ofs), dentry, acl_name);
}
static inline int ovl_do_rename(struct ovl_fs *ofs, struct inode *olddir,
@@ -298,10 +299,10 @@ static inline int ovl_do_rename(struct ovl_fs *ofs, struct inode *olddir,
{
int err;
struct renamedata rd = {
- .old_mnt_userns = ovl_upper_mnt_userns(ofs),
+ .old_mnt_idmap = ovl_upper_mnt_idmap(ofs),
.old_dir = olddir,
.old_dentry = olddentry,
- .new_mnt_userns = ovl_upper_mnt_userns(ofs),
+ .new_mnt_idmap = ovl_upper_mnt_idmap(ofs),
.new_dir = newdir,
.new_dentry = newdentry,
.flags = flags,
@@ -319,7 +320,7 @@ static inline int ovl_do_rename(struct ovl_fs *ofs, struct inode *olddir,
static inline int ovl_do_whiteout(struct ovl_fs *ofs,
struct inode *dir, struct dentry *dentry)
{
- int err = vfs_whiteout(ovl_upper_mnt_userns(ofs), dir, dentry);
+ int err = vfs_whiteout(ovl_upper_mnt_idmap(ofs), dir, dentry);
pr_debug("whiteout(%pd2) = %i\n", dentry, err);
return err;
}
@@ -328,7 +329,7 @@ static inline struct file *ovl_do_tmpfile(struct ovl_fs *ofs,
struct dentry *dentry, umode_t mode)
{
struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = dentry };
- struct file *file = vfs_tmpfile_open(ovl_upper_mnt_userns(ofs), &path, mode,
+ struct file *file = vfs_tmpfile_open(ovl_upper_mnt_idmap(ofs), &path, mode,
O_LARGEFILE | O_WRONLY, current_cred());
int err = PTR_ERR_OR_ZERO(file);
@@ -340,7 +341,7 @@ static inline struct dentry *ovl_lookup_upper(struct ovl_fs *ofs,
const char *name,
struct dentry *base, int len)
{
- return lookup_one(ovl_upper_mnt_userns(ofs), name, base, len);
+ return lookup_one(ovl_upper_mnt_idmap(ofs), name, base, len);
}
static inline bool ovl_open_flags_need_copy_up(int flags)
@@ -596,11 +597,11 @@ int ovl_set_nlink_lower(struct dentry *dentry);
unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry,
struct dentry *upperdentry,
unsigned int fallback);
-int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr);
-int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int ovl_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags);
-int ovl_permission(struct user_namespace *mnt_userns, struct inode *inode,
+int ovl_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask);
int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
const void *value, size_t size, int flags);
@@ -609,20 +610,20 @@ int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
#ifdef CONFIG_FS_POSIX_ACL
-struct posix_acl *do_ovl_get_acl(struct user_namespace *mnt_userns,
+struct posix_acl *do_ovl_get_acl(struct mnt_idmap *idmap,
struct inode *inode, int type,
bool rcu, bool noperm);
static inline struct posix_acl *ovl_get_inode_acl(struct inode *inode, int type,
bool rcu)
{
- return do_ovl_get_acl(&init_user_ns, inode, type, rcu, true);
+ return do_ovl_get_acl(&nop_mnt_idmap, inode, type, rcu, true);
}
-static inline struct posix_acl *ovl_get_acl(struct user_namespace *mnt_userns,
+static inline struct posix_acl *ovl_get_acl(struct mnt_idmap *idmap,
struct dentry *dentry, int type)
{
- return do_ovl_get_acl(mnt_userns, d_inode(dentry), type, false, false);
+ return do_ovl_get_acl(idmap, d_inode(dentry), type, false, false);
}
-int ovl_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ovl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type);
struct posix_acl *ovl_get_acl_path(const struct path *path,
const char *acl_name, bool noperm);
@@ -717,7 +718,7 @@ void ovl_aio_request_cache_destroy(void);
int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa);
int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa);
int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa);
-int ovl_fileattr_set(struct user_namespace *mnt_userns,
+int ovl_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
/* copy_up.c */
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index e1af8f660698..fd11fe6d6d45 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -90,9 +90,9 @@ static inline struct vfsmount *ovl_upper_mnt(struct ovl_fs *ofs)
return ofs->layers[0].mnt;
}
-static inline struct user_namespace *ovl_upper_mnt_userns(struct ovl_fs *ofs)
+static inline struct mnt_idmap *ovl_upper_mnt_idmap(struct ovl_fs *ofs)
{
- return mnt_user_ns(ovl_upper_mnt(ofs));
+ return mnt_idmap(ovl_upper_mnt(ofs));
}
static inline struct ovl_fs *OVL_FS(struct super_block *sb)
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 8cd2b9947de1..b6952b21a7ee 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -278,7 +278,7 @@ static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data
while (rdd->first_maybe_whiteout) {
p = rdd->first_maybe_whiteout;
rdd->first_maybe_whiteout = p->next_maybe_whiteout;
- dentry = lookup_one(mnt_user_ns(path->mnt), p->name, dir, p->len);
+ dentry = lookup_one(mnt_idmap(path->mnt), p->name, dir, p->len);
if (!IS_ERR(dentry)) {
p->is_whiteout = ovl_is_whiteout(dentry);
dput(dentry);
@@ -480,7 +480,7 @@ static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry
goto get;
}
}
- this = lookup_one(mnt_user_ns(path->mnt), p->name, dir, p->len);
+ this = lookup_one(mnt_idmap(path->mnt), p->name, dir, p->len);
if (IS_ERR_OR_NULL(this) || !this->d_inode) {
/* Mark a stale entry */
p->is_whiteout = true;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 85b891152a2c..f1d9f75f8786 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1012,7 +1012,7 @@ static int ovl_own_xattr_get(const struct xattr_handler *handler,
}
static int ovl_own_xattr_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
@@ -1028,7 +1028,7 @@ static int ovl_other_xattr_get(const struct xattr_handler *handler,
}
static int ovl_other_xattr_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index bde291623c8c..923d66d131c1 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -491,7 +491,7 @@ bool ovl_is_whiteout(struct dentry *dentry)
struct file *ovl_path_open(const struct path *path, int flags)
{
struct inode *inode = d_inode(path->dentry);
- struct user_namespace *real_mnt_userns = mnt_user_ns(path->mnt);
+ struct mnt_idmap *real_idmap = mnt_idmap(path->mnt);
int err, acc_mode;
if (flags & ~(O_ACCMODE | O_LARGEFILE))
@@ -508,12 +508,12 @@ struct file *ovl_path_open(const struct path *path, int flags)
BUG();
}
- err = inode_permission(real_mnt_userns, inode, acc_mode | MAY_OPEN);
+ err = inode_permission(real_idmap, inode, acc_mode | MAY_OPEN);
if (err)
return ERR_PTR(err);
/* O_NOATIME is an optimization, don't fail if not permitted */
- if (inode_owner_or_capable(real_mnt_userns, inode))
+ if (inode_owner_or_capable(real_idmap, inode))
flags |= O_NOATIME;
return dentry_open(path, flags, current_cred());
@@ -1101,16 +1101,16 @@ void ovl_copyattr(struct inode *inode)
{
struct path realpath;
struct inode *realinode;
- struct user_namespace *real_mnt_userns;
+ struct mnt_idmap *real_idmap;
vfsuid_t vfsuid;
vfsgid_t vfsgid;
ovl_i_path_real(inode, &realpath);
realinode = d_inode(realpath.dentry);
- real_mnt_userns = mnt_user_ns(realpath.mnt);
+ real_idmap = mnt_idmap(realpath.mnt);
- vfsuid = i_uid_into_vfsuid(real_mnt_userns, realinode);
- vfsgid = i_gid_into_vfsgid(real_mnt_userns, realinode);
+ vfsuid = i_uid_into_vfsuid(real_idmap, realinode);
+ vfsgid = i_gid_into_vfsgid(real_idmap, realinode);
inode->i_uid = vfsuid_into_kuid(vfsuid);
inode->i_gid = vfsgid_into_kgid(vfsgid);
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index d7bc81fc0840..5a76fb35923a 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -28,6 +28,7 @@
#include <linux/security.h>
#include <linux/evm.h>
#include <linux/fsnotify.h>
+#include <linux/filelock.h>
#include "internal.h"
@@ -111,7 +112,7 @@ void forget_all_cached_acls(struct inode *inode)
}
EXPORT_SYMBOL(forget_all_cached_acls);
-static struct posix_acl *__get_acl(struct user_namespace *mnt_userns,
+static struct posix_acl *__get_acl(struct mnt_idmap *idmap,
struct dentry *dentry, struct inode *inode,
int type)
{
@@ -154,7 +155,7 @@ static struct posix_acl *__get_acl(struct user_namespace *mnt_userns,
* we'll just create the negative cache entry.
*/
if (dentry && inode->i_op->get_acl) {
- acl = inode->i_op->get_acl(mnt_userns, dentry, type);
+ acl = inode->i_op->get_acl(idmap, dentry, type);
} else if (inode->i_op->get_inode_acl) {
acl = inode->i_op->get_inode_acl(inode, type, false);
} else {
@@ -174,14 +175,14 @@ static struct posix_acl *__get_acl(struct user_namespace *mnt_userns,
* Cache the result, but only if our sentinel is still in place.
*/
posix_acl_dup(acl);
- if (unlikely(cmpxchg(p, sentinel, acl) != sentinel))
+ if (unlikely(!try_cmpxchg(p, &sentinel, acl)))
posix_acl_release(acl);
return acl;
}
struct posix_acl *get_inode_acl(struct inode *inode, int type)
{
- return __get_acl(&init_user_ns, NULL, inode, type);
+ return __get_acl(&nop_mnt_idmap, NULL, inode, type);
}
EXPORT_SYMBOL(get_inode_acl);
@@ -372,7 +373,7 @@ EXPORT_SYMBOL(posix_acl_from_mode);
* by the acl. Returns -E... otherwise.
*/
int
-posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
+posix_acl_permission(struct mnt_idmap *idmap, struct inode *inode,
const struct posix_acl *acl, int want)
{
const struct posix_acl_entry *pa, *pe, *mask_obj;
@@ -387,18 +388,18 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
switch(pa->e_tag) {
case ACL_USER_OBJ:
/* (May have been checked already) */
- vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
+ vfsuid = i_uid_into_vfsuid(idmap, inode);
if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
goto check_perm;
break;
case ACL_USER:
- vfsuid = make_vfsuid(mnt_userns, fs_userns,
+ vfsuid = make_vfsuid(idmap, fs_userns,
pa->e_uid);
if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
goto mask;
break;
case ACL_GROUP_OBJ:
- vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
+ vfsgid = i_gid_into_vfsgid(idmap, inode);
if (vfsgid_in_group_p(vfsgid)) {
found = 1;
if ((pa->e_perm & want) == want)
@@ -406,7 +407,7 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
}
break;
case ACL_GROUP:
- vfsgid = make_vfsgid(mnt_userns, fs_userns,
+ vfsgid = make_vfsgid(idmap, fs_userns,
pa->e_gid);
if (vfsgid_in_group_p(vfsgid)) {
found = 1;
@@ -591,18 +592,18 @@ EXPORT_SYMBOL(__posix_acl_chmod);
/**
* posix_acl_chmod - chmod a posix acl
*
- * @mnt_userns: user namespace of the mount @inode was found from
+ * @idmap: idmap of the mount @inode was found from
* @dentry: dentry to check permissions on
* @mode: the new mode of @inode
*
- * If the dentry has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then
- * take care to map the inode according to @mnt_userns before checking
+ * If the dentry has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then
+ * take care to map the inode according to @idmap before checking
* permissions. On non-idmapped mounts or if permission checking is to be
- * performed on the raw inode simply passs init_user_ns.
+ * performed on the raw inode simply passs @nop_mnt_idmap.
*/
int
- posix_acl_chmod(struct user_namespace *mnt_userns, struct dentry *dentry,
+ posix_acl_chmod(struct mnt_idmap *idmap, struct dentry *dentry,
umode_t mode)
{
struct inode *inode = d_inode(dentry);
@@ -624,7 +625,7 @@ int
ret = __posix_acl_chmod(&acl, GFP_KERNEL, mode);
if (ret)
return ret;
- ret = inode->i_op->set_acl(mnt_userns, dentry, acl, ACL_TYPE_ACCESS);
+ ret = inode->i_op->set_acl(idmap, dentry, acl, ACL_TYPE_ACCESS);
posix_acl_release(acl);
return ret;
}
@@ -683,7 +684,7 @@ EXPORT_SYMBOL_GPL(posix_acl_create);
/**
* posix_acl_update_mode - update mode in set_acl
- * @mnt_userns: user namespace of the mount @inode was found from
+ * @idmap: idmap of the mount @inode was found from
* @inode: target inode
* @mode_p: mode (pointer) for update
* @acl: acl pointer
@@ -695,15 +696,15 @@ EXPORT_SYMBOL_GPL(posix_acl_create);
* As with chmod, clear the setgid bit if the caller is not in the owning group
* or capable of CAP_FSETID (see inode_change_ok).
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then
- * take care to map the inode according to @mnt_userns before checking
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then
+ * take care to map the inode according to @idmap before checking
* permissions. On non-idmapped mounts or if permission checking is to be
- * performed on the raw inode simply passs init_user_ns.
+ * performed on the raw inode simply passs @nop_mnt_idmap.
*
* Called from set_acl inode operations.
*/
-int posix_acl_update_mode(struct user_namespace *mnt_userns,
+int posix_acl_update_mode(struct mnt_idmap *idmap,
struct inode *inode, umode_t *mode_p,
struct posix_acl **acl)
{
@@ -715,8 +716,8 @@ int posix_acl_update_mode(struct user_namespace *mnt_userns,
return error;
if (error == 0)
*acl = NULL;
- if (!vfsgid_in_group_p(i_gid_into_vfsgid(mnt_userns, inode)) &&
- !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
+ if (!vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode)) &&
+ !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
mode &= ~S_ISGID;
*mode_p = mode;
return 0;
@@ -893,7 +894,6 @@ static ssize_t vfs_posix_acl_to_xattr(struct mnt_idmap *idmap,
struct posix_acl_xattr_header *ext_acl = buffer;
struct posix_acl_xattr_entry *ext_entry;
struct user_namespace *fs_userns, *caller_userns;
- struct user_namespace *mnt_userns;
ssize_t real_size, n;
vfsuid_t vfsuid;
vfsgid_t vfsgid;
@@ -909,19 +909,18 @@ static ssize_t vfs_posix_acl_to_xattr(struct mnt_idmap *idmap,
fs_userns = i_user_ns(inode);
caller_userns = current_user_ns();
- mnt_userns = mnt_idmap_owner(idmap);
for (n=0; n < acl->a_count; n++, ext_entry++) {
const struct posix_acl_entry *acl_e = &acl->a_entries[n];
ext_entry->e_tag = cpu_to_le16(acl_e->e_tag);
ext_entry->e_perm = cpu_to_le16(acl_e->e_perm);
switch(acl_e->e_tag) {
case ACL_USER:
- vfsuid = make_vfsuid(mnt_userns, fs_userns, acl_e->e_uid);
+ vfsuid = make_vfsuid(idmap, fs_userns, acl_e->e_uid);
ext_entry->e_id = cpu_to_le32(from_kuid(
caller_userns, vfsuid_into_kuid(vfsuid)));
break;
case ACL_GROUP:
- vfsgid = make_vfsgid(mnt_userns, fs_userns, acl_e->e_gid);
+ vfsgid = make_vfsgid(idmap, fs_userns, acl_e->e_gid);
ext_entry->e_id = cpu_to_le32(from_kgid(
caller_userns, vfsgid_into_kgid(vfsgid)));
break;
@@ -934,7 +933,7 @@ static ssize_t vfs_posix_acl_to_xattr(struct mnt_idmap *idmap,
}
int
-set_posix_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+set_posix_acl(struct mnt_idmap *idmap, struct dentry *dentry,
int type, struct posix_acl *acl)
{
struct inode *inode = d_inode(dentry);
@@ -946,7 +945,7 @@ set_posix_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
return acl ? -EACCES : 0;
- if (!inode_owner_or_capable(mnt_userns, inode))
+ if (!inode_owner_or_capable(idmap, inode))
return -EPERM;
if (acl) {
@@ -954,7 +953,7 @@ set_posix_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
if (ret)
return ret;
}
- return inode->i_op->set_acl(mnt_userns, dentry, acl, type);
+ return inode->i_op->set_acl(idmap, dentry, acl, type);
}
EXPORT_SYMBOL(set_posix_acl);
@@ -978,14 +977,14 @@ const struct xattr_handler posix_acl_default_xattr_handler = {
};
EXPORT_SYMBOL_GPL(posix_acl_default_xattr_handler);
-int simple_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int simple_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
int error;
struct inode *inode = d_inode(dentry);
if (type == ACL_TYPE_ACCESS) {
- error = posix_acl_update_mode(mnt_userns, inode,
+ error = posix_acl_update_mode(idmap, inode,
&inode->i_mode, &acl);
if (error)
return error;
@@ -1017,7 +1016,7 @@ int simple_acl_create(struct inode *dir, struct inode *inode)
return 0;
}
-static int vfs_set_acl_idmapped_mnt(struct user_namespace *mnt_userns,
+static int vfs_set_acl_idmapped_mnt(struct mnt_idmap *idmap,
struct user_namespace *fs_userns,
struct posix_acl *acl)
{
@@ -1026,11 +1025,11 @@ static int vfs_set_acl_idmapped_mnt(struct user_namespace *mnt_userns,
switch (acl_e->e_tag) {
case ACL_USER:
- acl_e->e_uid = from_vfsuid(mnt_userns, fs_userns,
+ acl_e->e_uid = from_vfsuid(idmap, fs_userns,
VFSUIDT_INIT(acl_e->e_uid));
break;
case ACL_GROUP:
- acl_e->e_gid = from_vfsgid(mnt_userns, fs_userns,
+ acl_e->e_gid = from_vfsgid(idmap, fs_userns,
VFSGIDT_INIT(acl_e->e_gid));
break;
}
@@ -1041,7 +1040,7 @@ static int vfs_set_acl_idmapped_mnt(struct user_namespace *mnt_userns,
/**
* vfs_set_acl - set posix acls
- * @mnt_userns: user namespace of the mount
+ * @idmap: idmap of the mount
* @dentry: the dentry based on which to set the posix acls
* @acl_name: the name of the posix acl
* @kacl: the posix acls in the appropriate VFS format
@@ -1051,7 +1050,7 @@ static int vfs_set_acl_idmapped_mnt(struct user_namespace *mnt_userns,
*
* Return: On success 0, on error negative errno.
*/
-int vfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int vfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
const char *acl_name, struct posix_acl *kacl)
{
int acl_type;
@@ -1071,7 +1070,7 @@ int vfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
* if this is a filesystem with a backing store - ultimately
* translate them to backing store values.
*/
- error = vfs_set_acl_idmapped_mnt(mnt_userns, i_user_ns(inode), kacl);
+ error = vfs_set_acl_idmapped_mnt(idmap, i_user_ns(inode), kacl);
if (error)
return error;
}
@@ -1083,11 +1082,11 @@ retry_deleg:
* We only care about restrictions the inode struct itself places upon
* us otherwise POSIX ACLs aren't subject to any VFS restrictions.
*/
- error = may_write_xattr(mnt_userns, inode);
+ error = may_write_xattr(idmap, inode);
if (error)
goto out_inode_unlock;
- error = security_inode_set_acl(mnt_userns, dentry, acl_name, kacl);
+ error = security_inode_set_acl(idmap, dentry, acl_name, kacl);
if (error)
goto out_inode_unlock;
@@ -1096,7 +1095,7 @@ retry_deleg:
goto out_inode_unlock;
if (inode->i_opflags & IOP_XATTR)
- error = set_posix_acl(mnt_userns, dentry, acl_type, kacl);
+ error = set_posix_acl(idmap, dentry, acl_type, kacl);
else if (unlikely(is_bad_inode(inode)))
error = -EIO;
else
@@ -1121,7 +1120,7 @@ EXPORT_SYMBOL_GPL(vfs_set_acl);
/**
* vfs_get_acl - get posix acls
- * @mnt_userns: user namespace of the mount
+ * @idmap: idmap of the mount
* @dentry: the dentry based on which to retrieve the posix acls
* @acl_name: the name of the posix acl
*
@@ -1130,7 +1129,7 @@ EXPORT_SYMBOL_GPL(vfs_set_acl);
*
* Return: On success POSIX ACLs in VFS format, on error negative errno.
*/
-struct posix_acl *vfs_get_acl(struct user_namespace *mnt_userns,
+struct posix_acl *vfs_get_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name)
{
struct inode *inode = d_inode(dentry);
@@ -1145,7 +1144,7 @@ struct posix_acl *vfs_get_acl(struct user_namespace *mnt_userns,
* The VFS has no restrictions on reading POSIX ACLs so calling
* something like xattr_permission() isn't needed. Only LSMs get a say.
*/
- error = security_inode_get_acl(mnt_userns, dentry, acl_name);
+ error = security_inode_get_acl(idmap, dentry, acl_name);
if (error)
return ERR_PTR(error);
@@ -1154,7 +1153,7 @@ struct posix_acl *vfs_get_acl(struct user_namespace *mnt_userns,
if (S_ISLNK(inode->i_mode))
return ERR_PTR(-EOPNOTSUPP);
- acl = __get_acl(mnt_userns, dentry, inode, acl_type);
+ acl = __get_acl(idmap, dentry, inode, acl_type);
if (IS_ERR(acl))
return acl;
if (!acl)
@@ -1166,7 +1165,7 @@ EXPORT_SYMBOL_GPL(vfs_get_acl);
/**
* vfs_remove_acl - remove posix acls
- * @mnt_userns: user namespace of the mount
+ * @idmap: idmap of the mount
* @dentry: the dentry based on which to retrieve the posix acls
* @acl_name: the name of the posix acl
*
@@ -1174,7 +1173,7 @@ EXPORT_SYMBOL_GPL(vfs_get_acl);
*
* Return: On success 0, on error negative errno.
*/
-int vfs_remove_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int vfs_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry,
const char *acl_name)
{
int acl_type;
@@ -1193,11 +1192,11 @@ retry_deleg:
* We only care about restrictions the inode struct itself places upon
* us otherwise POSIX ACLs aren't subject to any VFS restrictions.
*/
- error = may_write_xattr(mnt_userns, inode);
+ error = may_write_xattr(idmap, inode);
if (error)
goto out_inode_unlock;
- error = security_inode_remove_acl(mnt_userns, dentry, acl_name);
+ error = security_inode_remove_acl(idmap, dentry, acl_name);
if (error)
goto out_inode_unlock;
@@ -1206,14 +1205,14 @@ retry_deleg:
goto out_inode_unlock;
if (inode->i_opflags & IOP_XATTR)
- error = set_posix_acl(mnt_userns, dentry, acl_type, NULL);
+ error = set_posix_acl(idmap, dentry, acl_type, NULL);
else if (unlikely(is_bad_inode(inode)))
error = -EIO;
else
error = -EOPNOTSUPP;
if (!error) {
fsnotify_xattr(dentry);
- evm_inode_post_remove_acl(mnt_userns, dentry, acl_name);
+ evm_inode_post_remove_acl(idmap, dentry, acl_name);
}
out_inode_unlock:
@@ -1245,7 +1244,7 @@ int do_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
return PTR_ERR(acl);
}
- error = vfs_set_acl(mnt_idmap_owner(idmap), dentry, acl_name, acl);
+ error = vfs_set_acl(idmap, dentry, acl_name, acl);
posix_acl_release(acl);
return error;
}
@@ -1256,7 +1255,7 @@ ssize_t do_get_acl(struct mnt_idmap *idmap, struct dentry *dentry,
ssize_t error;
struct posix_acl *acl;
- acl = vfs_get_acl(mnt_idmap_owner(idmap), dentry, acl_name);
+ acl = vfs_get_acl(idmap, dentry, acl_name);
if (IS_ERR(acl))
return PTR_ERR(acl);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9e479d7d202b..5e0e0ccd47aa 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -685,7 +685,7 @@ static bool proc_fd_access_allowed(struct inode *inode)
return allowed;
}
-int proc_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int proc_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
int error;
@@ -694,11 +694,11 @@ int proc_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if (attr->ia_valid & ATTR_MODE)
return -EPERM;
- error = setattr_prepare(&init_user_ns, dentry, attr);
+ error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (error)
return error;
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(&nop_mnt_idmap, inode, attr);
mark_inode_dirty(inode);
return 0;
}
@@ -727,7 +727,7 @@ static bool has_pid_permissions(struct proc_fs_info *fs_info,
}
-static int proc_pid_permission(struct user_namespace *mnt_userns,
+static int proc_pid_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
@@ -753,7 +753,7 @@ static int proc_pid_permission(struct user_namespace *mnt_userns,
return -EPERM;
}
- return generic_permission(&init_user_ns, inode, mask);
+ return generic_permission(&nop_mnt_idmap, inode, mask);
}
@@ -1959,14 +1959,14 @@ static struct inode *proc_pid_make_base_inode(struct super_block *sb,
return inode;
}
-int pid_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int pid_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
struct task_struct *task;
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
stat->uid = GLOBAL_ROOT_UID;
stat->gid = GLOBAL_ROOT_GID;
@@ -3557,7 +3557,7 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
* This function makes sure that the node is always accessible for members of
* same thread group.
*/
-static int proc_tid_comm_permission(struct user_namespace *mnt_userns,
+static int proc_tid_comm_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
bool is_same_tgroup;
@@ -3577,7 +3577,7 @@ static int proc_tid_comm_permission(struct user_namespace *mnt_userns,
return 0;
}
- return generic_permission(&init_user_ns, inode, mask);
+ return generic_permission(&nop_mnt_idmap, inode, mask);
}
static const struct inode_operations proc_tid_comm_inode_operations = {
@@ -3891,13 +3891,13 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
return 0;
}
-static int proc_task_getattr(struct user_namespace *mnt_userns,
+static int proc_task_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
struct task_struct *p = get_proc_task(inode);
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
if (p) {
stat->nlink += get_nr_threads(p);
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index fc46d6fe080c..b3140deebbbf 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -12,6 +12,7 @@
#include <linux/file.h>
#include <linux/seq_file.h>
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/proc_fs.h>
@@ -325,13 +326,13 @@ static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
* /proc/pid/fd needs a special permission handler so that a process can still
* access /proc/self/fd after it has executed a setuid().
*/
-int proc_fd_permission(struct user_namespace *mnt_userns,
+int proc_fd_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
struct task_struct *p;
int rv;
- rv = generic_permission(&init_user_ns, inode, mask);
+ rv = generic_permission(&nop_mnt_idmap, inode, mask);
if (rv == 0)
return rv;
@@ -344,14 +345,14 @@ int proc_fd_permission(struct user_namespace *mnt_userns,
return rv;
}
-static int proc_fd_getattr(struct user_namespace *mnt_userns,
+static int proc_fd_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
int rv = 0;
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
/* If it's a directory, put the number of open fds there */
if (S_ISDIR(inode->i_mode)) {
diff --git a/fs/proc/fd.h b/fs/proc/fd.h
index c5a921a06a0b..7e7265f7e06f 100644
--- a/fs/proc/fd.h
+++ b/fs/proc/fd.h
@@ -10,7 +10,7 @@ extern const struct inode_operations proc_fd_inode_operations;
extern const struct file_operations proc_fdinfo_operations;
extern const struct inode_operations proc_fdinfo_inode_operations;
-extern int proc_fd_permission(struct user_namespace *mnt_userns,
+extern int proc_fd_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask);
static inline unsigned int proc_fd(struct inode *inode)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 587b91d9d998..8379593fa4bb 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -115,18 +115,18 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir,
return true;
}
-static int proc_notify_change(struct user_namespace *mnt_userns,
+static int proc_notify_change(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
struct proc_dir_entry *de = PDE(inode);
int error;
- error = setattr_prepare(&init_user_ns, dentry, iattr);
+ error = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
if (error)
return error;
- setattr_copy(&init_user_ns, inode, iattr);
+ setattr_copy(&nop_mnt_idmap, inode, iattr);
mark_inode_dirty(inode);
proc_set_user(de, inode->i_uid, inode->i_gid);
@@ -134,7 +134,7 @@ static int proc_notify_change(struct user_namespace *mnt_userns,
return 0;
}
-static int proc_getattr(struct user_namespace *mnt_userns,
+static int proc_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
@@ -147,7 +147,7 @@ static int proc_getattr(struct user_namespace *mnt_userns,
}
}
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
return 0;
}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index b701d0207edf..9dda7e54b2d0 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -162,9 +162,9 @@ extern int proc_pid_statm(struct seq_file *, struct pid_namespace *,
* base.c
*/
extern const struct dentry_operations pid_dentry_operations;
-extern int pid_getattr(struct user_namespace *, const struct path *,
+extern int pid_getattr(struct mnt_idmap *, const struct path *,
struct kstat *, u32, unsigned int);
-extern int proc_setattr(struct user_namespace *, struct dentry *,
+extern int proc_setattr(struct mnt_idmap *, struct dentry *,
struct iattr *);
extern void proc_pid_evict_inode(struct proc_inode *);
extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t);
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 856839b8ae8b..a0c0419872e3 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -299,7 +299,7 @@ static struct dentry *proc_tgid_net_lookup(struct inode *dir,
return de;
}
-static int proc_tgid_net_getattr(struct user_namespace *mnt_userns,
+static int proc_tgid_net_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
@@ -308,7 +308,7 @@ static int proc_tgid_net_getattr(struct user_namespace *mnt_userns,
net = get_proc_task_net(inode);
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
if (net != NULL) {
stat->nlink = net->proc_net->nlink;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 48f2d60bd78a..e89bd8f1368b 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -798,7 +798,7 @@ out:
return 0;
}
-static int proc_sys_permission(struct user_namespace *mnt_userns,
+static int proc_sys_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
/*
@@ -827,7 +827,7 @@ static int proc_sys_permission(struct user_namespace *mnt_userns,
return error;
}
-static int proc_sys_setattr(struct user_namespace *mnt_userns,
+static int proc_sys_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
@@ -836,16 +836,16 @@ static int proc_sys_setattr(struct user_namespace *mnt_userns,
if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
return -EPERM;
- error = setattr_prepare(&init_user_ns, dentry, attr);
+ error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (error)
return error;
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(&nop_mnt_idmap, inode, attr);
mark_inode_dirty(inode);
return 0;
}
-static int proc_sys_getattr(struct user_namespace *mnt_userns,
+static int proc_sys_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
@@ -856,7 +856,7 @@ static int proc_sys_getattr(struct user_namespace *mnt_userns,
if (IS_ERR(head))
return PTR_ERR(head);
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
if (table)
stat->mode = (stat->mode & S_IFMT) | table->mode;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 3c2ee3eb1138..a86e65a608da 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -310,11 +310,11 @@ void __init proc_root_init(void)
register_filesystem(&proc_fs_type);
}
-static int proc_root_getattr(struct user_namespace *mnt_userns,
+static int proc_root_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
- generic_fillattr(&init_user_ns, d_inode(path->dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat);
stat->nlink = proc_root.nlink + nr_processes();
return 0;
}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index e35a0398db63..af1c49ae11b1 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -745,9 +745,7 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
page = pfn_swap_entry_to_page(swpent);
}
if (page) {
- int mapcount = page_mapcount(page);
-
- if (mapcount >= 2)
+ if (page_mapcount(page) >= 2 || hugetlb_pmd_shared(pte))
mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
else
mss->private_hugetlb += huge_page_size(hstate_vma(vma));
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index f27faf5db554..a6357f728034 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2085,7 +2085,7 @@ EXPORT_SYMBOL(__dquot_transfer);
/* Wrapper for transferring ownership of an inode for uid/gid only
* Called from FSXXX_setattr()
*/
-int dquot_transfer(struct user_namespace *mnt_userns, struct inode *inode,
+int dquot_transfer(struct mnt_idmap *idmap, struct inode *inode,
struct iattr *iattr)
{
struct dquot *transfer_to[MAXQUOTAS] = {};
@@ -2096,8 +2096,8 @@ int dquot_transfer(struct user_namespace *mnt_userns, struct inode *inode,
if (!dquot_active(inode))
return 0;
- if (i_uid_needs_update(mnt_userns, iattr, inode)) {
- kuid_t kuid = from_vfsuid(mnt_userns, i_user_ns(inode),
+ if (i_uid_needs_update(idmap, iattr, inode)) {
+ kuid_t kuid = from_vfsuid(idmap, i_user_ns(inode),
iattr->ia_vfsuid);
dquot = dqget(sb, make_kqid_uid(kuid));
@@ -2110,8 +2110,8 @@ int dquot_transfer(struct user_namespace *mnt_userns, struct inode *inode,
}
transfer_to[USRQUOTA] = dquot;
}
- if (i_gid_needs_update(mnt_userns, iattr, inode)) {
- kgid_t kgid = from_vfsgid(mnt_userns, i_user_ns(inode),
+ if (i_gid_needs_update(idmap, iattr, inode)) {
+ kgid_t kgid = from_vfsgid(idmap, i_user_ns(inode),
iattr->ia_vfsgid);
dquot = dqget(sb, make_kqid_gid(kgid));
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index cb240eac5036..5bf74c2f6042 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -22,7 +22,7 @@
#include <linux/uaccess.h>
#include "internal.h"
-static int ramfs_nommu_setattr(struct user_namespace *, struct dentry *, struct iattr *);
+static int ramfs_nommu_setattr(struct mnt_idmap *, struct dentry *, struct iattr *);
static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
unsigned long addr,
unsigned long len,
@@ -158,7 +158,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
* handle a change of attributes
* - we're specifically interested in a change of size
*/
-static int ramfs_nommu_setattr(struct user_namespace *mnt_userns,
+static int ramfs_nommu_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *ia)
{
struct inode *inode = d_inode(dentry);
@@ -166,7 +166,7 @@ static int ramfs_nommu_setattr(struct user_namespace *mnt_userns,
int ret = 0;
/* POSIX UID/GID verification for setting inode attributes */
- ret = setattr_prepare(&init_user_ns, dentry, ia);
+ ret = setattr_prepare(&nop_mnt_idmap, dentry, ia);
if (ret)
return ret;
@@ -186,7 +186,7 @@ static int ramfs_nommu_setattr(struct user_namespace *mnt_userns,
}
}
- setattr_copy(&init_user_ns, inode, ia);
+ setattr_copy(&nop_mnt_idmap, inode, ia);
out:
ia->ia_valid = old_ia_valid;
return ret;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index b3257e852820..5ba580c78835 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -61,7 +61,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
if (inode) {
inode->i_ino = get_next_ino();
- inode_init_owner(&init_user_ns, inode, dir, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
inode->i_mapping->a_ops = &ram_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
mapping_set_unevictable(inode->i_mapping);
@@ -95,7 +95,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
*/
/* SMP-safe */
static int
-ramfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ramfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t dev)
{
struct inode * inode = ramfs_get_inode(dir->i_sb, dir, mode, dev);
@@ -110,22 +110,22 @@ ramfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
return error;
}
-static int ramfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int ramfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
- int retval = ramfs_mknod(&init_user_ns, dir, dentry, mode | S_IFDIR, 0);
+ int retval = ramfs_mknod(&nop_mnt_idmap, dir, dentry, mode | S_IFDIR, 0);
if (!retval)
inc_nlink(dir);
return retval;
}
-static int ramfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int ramfs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
- return ramfs_mknod(&init_user_ns, dir, dentry, mode | S_IFREG, 0);
+ return ramfs_mknod(&nop_mnt_idmap, dir, dentry, mode | S_IFREG, 0);
}
-static int ramfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int ramfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
struct inode *inode;
@@ -145,7 +145,7 @@ static int ramfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
return error;
}
-static int ramfs_tmpfile(struct user_namespace *mnt_userns,
+static int ramfs_tmpfile(struct mnt_idmap *idmap,
struct inode *dir, struct file *file, umode_t mode)
{
struct inode *inode;
diff --git a/fs/reiserfs/acl.h b/fs/reiserfs/acl.h
index 29c503a06db4..2571b1a8be84 100644
--- a/fs/reiserfs/acl.h
+++ b/fs/reiserfs/acl.h
@@ -49,7 +49,7 @@ static inline int reiserfs_acl_count(size_t size)
#ifdef CONFIG_REISERFS_FS_POSIX_ACL
struct posix_acl *reiserfs_get_acl(struct inode *inode, int type, bool rcu);
-int reiserfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int reiserfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type);
int reiserfs_acl_chmod(struct dentry *dentry);
int reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index c7d1fa526dea..d54cab854f60 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3262,21 +3262,21 @@ static ssize_t reiserfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
return ret;
}
-int reiserfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int reiserfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
unsigned int ia_valid;
int error;
- error = setattr_prepare(&init_user_ns, dentry, attr);
+ error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (error)
return error;
/* must be turned off for recursive notify_change calls */
ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
- if (is_quota_modification(mnt_userns, inode, attr)) {
+ if (is_quota_modification(&nop_mnt_idmap, inode, attr)) {
error = dquot_initialize(inode);
if (error)
return error;
@@ -3359,7 +3359,7 @@ int reiserfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
reiserfs_write_unlock(inode->i_sb);
if (error)
goto out;
- error = dquot_transfer(mnt_userns, inode, attr);
+ error = dquot_transfer(&nop_mnt_idmap, inode, attr);
reiserfs_write_lock(inode->i_sb);
if (error) {
journal_end(&th);
@@ -3398,7 +3398,7 @@ int reiserfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
}
if (!error) {
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(&nop_mnt_idmap, inode, attr);
mark_inode_dirty(inode);
}
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 4b86ecf5817e..6bf9b54e58ca 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -24,7 +24,7 @@ int reiserfs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
return 0;
}
-int reiserfs_fileattr_set(struct user_namespace *mnt_userns,
+int reiserfs_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa)
{
struct inode *inode = d_inode(dentry);
@@ -96,7 +96,7 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
err = put_user(inode->i_generation, (int __user *)arg);
break;
case REISERFS_IOC_SETVERSION:
- if (!inode_owner_or_capable(&init_user_ns, inode)) {
+ if (!inode_owner_or_capable(&nop_mnt_idmap, inode)) {
err = -EPERM;
break;
}
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 0b8aa99749f1..42d2c20e1345 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -616,11 +616,11 @@ static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode)
* the quota init calls have to know who to charge the quota to, so
* we have to set uid and gid here
*/
- inode_init_owner(&init_user_ns, inode, dir, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
return dquot_initialize(inode);
}
-static int reiserfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int reiserfs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
int retval;
@@ -700,7 +700,7 @@ out_failed:
return retval;
}
-static int reiserfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int reiserfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
int retval;
@@ -784,7 +784,7 @@ out_failed:
return retval;
}
-static int reiserfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int reiserfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
int retval;
@@ -1099,7 +1099,7 @@ out_unlink:
return retval;
}
-static int reiserfs_symlink(struct user_namespace *mnt_userns,
+static int reiserfs_symlink(struct mnt_idmap *idmap,
struct inode *parent_dir, struct dentry *dentry,
const char *symname)
{
@@ -1311,7 +1311,7 @@ static void set_ino_in_dir_entry(struct reiserfs_dir_entry *de,
* one path. If it holds 2 or more, it can get into endless waiting in
* get_empty_nodes or its clones
*/
-static int reiserfs_rename(struct user_namespace *mnt_userns,
+static int reiserfs_rename(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index 3aa928ec527a..98e6f53c2fe0 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -3100,7 +3100,7 @@ static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th,
}
void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode);
-int reiserfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int reiserfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr);
int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len);
@@ -3407,7 +3407,7 @@ __u32 r5_hash(const signed char *msg, int len);
/* prototypes from ioctl.c */
int reiserfs_fileattr_get(struct dentry *dentry, struct fileattr *fa);
-int reiserfs_fileattr_set(struct user_namespace *mnt_userns,
+int reiserfs_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
long reiserfs_compat_ioctl(struct file *filp,
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 8b2d52443f41..06d810c72c52 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -66,14 +66,14 @@
static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
{
BUG_ON(!inode_is_locked(dir));
- return dir->i_op->create(&init_user_ns, dir, dentry, mode, true);
+ return dir->i_op->create(&nop_mnt_idmap, dir, dentry, mode, true);
}
#endif
static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
BUG_ON(!inode_is_locked(dir));
- return dir->i_op->mkdir(&init_user_ns, dir, dentry, mode);
+ return dir->i_op->mkdir(&nop_mnt_idmap, dir, dentry, mode);
}
/*
@@ -352,7 +352,7 @@ static int chown_one_xattr(struct dentry *dentry, void *data)
* ATTR_MODE is set.
*/
attrs->ia_valid &= (ATTR_UID|ATTR_GID);
- err = reiserfs_setattr(&init_user_ns, dentry, attrs);
+ err = reiserfs_setattr(&nop_mnt_idmap, dentry, attrs);
attrs->ia_valid = ia_valid;
return err;
@@ -597,7 +597,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
inode_lock_nested(d_inode(dentry), I_MUTEX_XATTR);
inode_dio_wait(d_inode(dentry));
- err = reiserfs_setattr(&init_user_ns, dentry, &newattrs);
+ err = reiserfs_setattr(&nop_mnt_idmap, dentry, &newattrs);
inode_unlock(d_inode(dentry));
} else
update_ctime(inode);
@@ -941,7 +941,7 @@ static int xattr_mount_check(struct super_block *s)
return 0;
}
-int reiserfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+int reiserfs_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask)
{
/*
@@ -951,7 +951,7 @@ int reiserfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
if (IS_PRIVATE(inode))
return 0;
- return generic_permission(&init_user_ns, inode, mask);
+ return generic_permission(&nop_mnt_idmap, inode, mask);
}
static int xattr_hide_revalidate(struct dentry *dentry, unsigned int flags)
diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h
index e47fde1182de..5868a4e990e3 100644
--- a/fs/reiserfs/xattr.h
+++ b/fs/reiserfs/xattr.h
@@ -16,7 +16,7 @@ int reiserfs_xattr_init(struct super_block *sb, int mount_flags);
int reiserfs_lookup_privroot(struct super_block *sb);
int reiserfs_delete_xattrs(struct inode *inode);
int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs);
-int reiserfs_permission(struct user_namespace *mnt_userns,
+int reiserfs_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask);
#ifdef CONFIG_REISERFS_FS_XATTR
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 93fe414fed18..138060452678 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -18,7 +18,7 @@ static int __reiserfs_set_acl(struct reiserfs_transaction_handle *th,
int
-reiserfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+reiserfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
int error, error2;
@@ -42,7 +42,7 @@ reiserfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
reiserfs_write_unlock(inode->i_sb);
if (error == 0) {
if (type == ACL_TYPE_ACCESS && acl) {
- error = posix_acl_update_mode(&init_user_ns, inode,
+ error = posix_acl_update_mode(&nop_mnt_idmap, inode,
&mode, &acl);
if (error)
goto unlock;
@@ -407,5 +407,5 @@ int reiserfs_acl_chmod(struct dentry *dentry)
!reiserfs_posixacl(inode->i_sb))
return 0;
- return posix_acl_chmod(&init_user_ns, dentry, inode->i_mode);
+ return posix_acl_chmod(&nop_mnt_idmap, dentry, inode->i_mode);
}
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 857a65b05726..41c0ea84fbff 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -22,7 +22,7 @@ security_get(const struct xattr_handler *handler, struct dentry *unused,
static int
security_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns, struct dentry *unused,
+ struct mnt_idmap *idmap, struct dentry *unused,
struct inode *inode, const char *name, const void *buffer,
size_t size, int flags)
{
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index d853cea2afcd..0c0c74d8db0e 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -21,7 +21,7 @@ trusted_get(const struct xattr_handler *handler, struct dentry *unused,
static int
trusted_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns, struct dentry *unused,
+ struct mnt_idmap *idmap, struct dentry *unused,
struct inode *inode, const char *name, const void *buffer,
size_t size, int flags)
{
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 65d9cd10a5ea..88195181e1d7 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -18,7 +18,7 @@ user_get(const struct xattr_handler *handler, struct dentry *unused,
}
static int
-user_set(const struct xattr_handler *handler, struct user_namespace *mnt_userns,
+user_set(const struct xattr_handler *handler, struct mnt_idmap *idmap,
struct dentry *unused,
struct inode *inode, const char *name, const void *buffer,
size_t size, int flags)
diff --git a/fs/remap_range.c b/fs/remap_range.c
index 41f60477bb41..1331a890f2f2 100644
--- a/fs/remap_range.c
+++ b/fs/remap_range.c
@@ -419,16 +419,16 @@ EXPORT_SYMBOL(vfs_clone_file_range);
/* Check whether we are allowed to dedupe the destination file */
static bool allow_file_dedupe(struct file *file)
{
- struct user_namespace *mnt_userns = file_mnt_user_ns(file);
+ struct mnt_idmap *idmap = file_mnt_idmap(file);
struct inode *inode = file_inode(file);
if (capable(CAP_SYS_ADMIN))
return true;
if (file->f_mode & FMODE_WRITE)
return true;
- if (vfsuid_eq_kuid(i_uid_into_vfsuid(mnt_userns, inode), current_fsuid()))
+ if (vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid()))
return true;
- if (!inode_permission(mnt_userns, inode, MAY_WRITE))
+ if (!inode_permission(idmap, inode, MAY_WRITE))
return true;
return false;
}
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index b3fdc8212c5f..95f8e8901768 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -183,7 +183,7 @@ static inline int squashfs_block_size(__le32 raw)
#define SQUASHFS_ID_BLOCK_BYTES(A) (SQUASHFS_ID_BLOCKS(A) *\
sizeof(u64))
/* xattr id lookup table defines */
-#define SQUASHFS_XATTR_BYTES(A) ((A) * sizeof(struct squashfs_xattr_id))
+#define SQUASHFS_XATTR_BYTES(A) (((u64) (A)) * sizeof(struct squashfs_xattr_id))
#define SQUASHFS_XATTR_BLOCK(A) (SQUASHFS_XATTR_BYTES(A) / \
SQUASHFS_METADATA_SIZE)
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 659082e9e51d..72f6f4b37863 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -63,7 +63,7 @@ struct squashfs_sb_info {
long long bytes_used;
unsigned int inodes;
unsigned int fragments;
- int xattr_ids;
+ unsigned int xattr_ids;
unsigned int ids;
bool panic_on_errors;
const struct squashfs_decompressor_thread_ops *thread_ops;
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h
index d8a270d3ac4c..f1a463d8bfa0 100644
--- a/fs/squashfs/xattr.h
+++ b/fs/squashfs/xattr.h
@@ -10,12 +10,12 @@
#ifdef CONFIG_SQUASHFS_XATTR
extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64,
- u64 *, int *);
+ u64 *, unsigned int *);
extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *,
unsigned int *, unsigned long long *);
#else
static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
- u64 start, u64 *xattr_table_start, int *xattr_ids)
+ u64 start, u64 *xattr_table_start, unsigned int *xattr_ids)
{
struct squashfs_xattr_id_table *id_table;
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index 087cab8c78f4..c8469c656e0d 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -56,7 +56,7 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index,
* Read uncompressed xattr id lookup table indexes from disk into memory
*/
__le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start,
- u64 *xattr_table_start, int *xattr_ids)
+ u64 *xattr_table_start, unsigned int *xattr_ids)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
unsigned int len, indexes;
diff --git a/fs/stat.c b/fs/stat.c
index d6cc74ca8486..7c238da22ef0 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -18,6 +18,7 @@
#include <linux/syscalls.h>
#include <linux/pagemap.h>
#include <linux/compat.h>
+#include <linux/iversion.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
@@ -27,7 +28,7 @@
/**
* generic_fillattr - Fill in the basic attributes from the inode struct
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @inode: Inode to use as the source
* @stat: Where to fill in the attributes
*
@@ -35,17 +36,17 @@
* found on the VFS inode structure. This is the default if no getattr inode
* operation is supplied.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then
- * take care to map the inode according to @mnt_userns before filling in the
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then
+ * take care to map the inode according to @idmap before filling in the
* uid and gid filds. On non-idmapped mounts or if permission checking is to be
- * performed on the raw inode simply passs init_user_ns.
+ * performed on the raw inode simply passs @nop_mnt_idmap.
*/
-void generic_fillattr(struct user_namespace *mnt_userns, struct inode *inode,
+void generic_fillattr(struct mnt_idmap *idmap, struct inode *inode,
struct kstat *stat)
{
- vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
- vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
+ vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
+ vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
stat->dev = inode->i_sb->s_dev;
stat->ino = inode->i_ino;
@@ -97,7 +98,7 @@ EXPORT_SYMBOL(generic_fill_statx_attr);
int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
- struct user_namespace *mnt_userns;
+ struct mnt_idmap *idmap;
struct inode *inode = d_backing_inode(path->dentry);
memset(stat, 0, sizeof(*stat));
@@ -122,12 +123,17 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT |
STATX_ATTR_DAX);
- mnt_userns = mnt_user_ns(path->mnt);
+ if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) {
+ stat->result_mask |= STATX_CHANGE_COOKIE;
+ stat->change_cookie = inode_query_iversion(inode);
+ }
+
+ idmap = mnt_idmap(path->mnt);
if (inode->i_op->getattr)
- return inode->i_op->getattr(mnt_userns, path, stat,
+ return inode->i_op->getattr(idmap, path, stat,
request_mask, query_flags);
- generic_fillattr(mnt_userns, inode, stat);
+ generic_fillattr(idmap, inode, stat);
return 0;
}
EXPORT_SYMBOL(vfs_getattr_nosec);
@@ -602,9 +608,11 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
memset(&tmp, 0, sizeof(tmp));
- tmp.stx_mask = stat->result_mask;
+ /* STATX_CHANGE_COOKIE is kernel-only for now */
+ tmp.stx_mask = stat->result_mask & ~STATX_CHANGE_COOKIE;
tmp.stx_blksize = stat->blksize;
- tmp.stx_attributes = stat->attributes;
+ /* STATX_ATTR_CHANGE_MONOTONIC is kernel-only for now */
+ tmp.stx_attributes = stat->attributes & ~STATX_ATTR_CHANGE_MONOTONIC;
tmp.stx_nlink = stat->nlink;
tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid);
tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid);
@@ -643,6 +651,11 @@ int do_statx(int dfd, struct filename *filename, unsigned int flags,
if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
return -EINVAL;
+ /* STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests
+ * from userland.
+ */
+ mask &= ~STATX_CHANGE_COOKIE;
+
error = vfs_statx(dfd, filename, flags, &stat, mask);
if (error)
return error;
diff --git a/fs/super.c b/fs/super.c
index 12c08cb20405..8e531174e7c2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -291,7 +291,6 @@ static void __put_super(struct super_block *s)
WARN_ON(s->s_inode_lru.node);
WARN_ON(!list_empty(&s->s_mounts));
security_sb_free(s);
- fscrypt_destroy_keyring(s);
put_user_ns(s->s_user_ns);
kfree(s->s_subtype);
call_rcu(&s->rcu, destroy_super_rcu);
@@ -491,10 +490,23 @@ void generic_shutdown_super(struct super_block *sb)
if (sop->put_super)
sop->put_super(sb);
- if (!list_empty(&sb->s_inodes)) {
- printk("VFS: Busy inodes after unmount of %s. "
- "Self-destruct in 5 seconds. Have a nice day...\n",
- sb->s_id);
+ if (CHECK_DATA_CORRUPTION(!list_empty(&sb->s_inodes),
+ "VFS: Busy inodes after unmount of %s (%s)",
+ sb->s_id, sb->s_type->name)) {
+ /*
+ * Adding a proper bailout path here would be hard, but
+ * we can at least make it more likely that a later
+ * iput_final() or such crashes cleanly.
+ */
+ struct inode *inode;
+
+ spin_lock(&sb->s_inode_list_lock);
+ list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+ inode->i_op = VFS_PTR_POISON;
+ inode->i_sb = VFS_PTR_POISON;
+ inode->i_mapping = VFS_PTR_POISON;
+ }
+ spin_unlock(&sb->s_inode_list_lock);
}
}
spin_lock(&sb_lock);
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 90e00124ea07..50eb92557a0f 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -29,13 +29,13 @@ const struct file_operations sysv_file_operations = {
.splice_read = generic_file_splice_read,
};
-static int sysv_setattr(struct user_namespace *mnt_userns,
+static int sysv_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
int error;
- error = setattr_prepare(&init_user_ns, dentry, attr);
+ error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (error)
return error;
@@ -48,7 +48,7 @@ static int sysv_setattr(struct user_namespace *mnt_userns,
sysv_truncate(inode);
}
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(&nop_mnt_idmap, inode, attr);
mark_inode_dirty(inode);
return 0;
}
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index 50df794a3c1f..e732879036ab 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -163,7 +163,7 @@ struct inode * sysv_new_inode(const struct inode * dir, umode_t mode)
*sbi->s_sb_fic_count = cpu_to_fs16(sbi, count);
fs16_add(sbi, sbi->s_sb_total_free_inodes, -1);
dirty_sb(sb);
- inode_init_owner(&init_user_ns, inode, dir, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
inode->i_ino = fs16_to_cpu(sbi, ino);
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
inode->i_blocks = 0;
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index 3b8567564e7e..b22764fe669c 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -441,11 +441,11 @@ static unsigned sysv_nblocks(struct super_block *s, loff_t size)
return res;
}
-int sysv_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int sysv_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct super_block *s = path->dentry->d_sb;
- generic_fillattr(&init_user_ns, d_inode(path->dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat);
stat->blocks = (s->s_blocksize / 512) * sysv_nblocks(s, stat->size);
stat->blksize = s->s_blocksize;
return 0;
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index b2e6abc06a2d..ecd424461511 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -41,7 +41,7 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, un
return d_splice_alias(inode, dentry);
}
-static int sysv_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int sysv_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct inode * inode;
@@ -61,13 +61,13 @@ static int sysv_mknod(struct user_namespace *mnt_userns, struct inode *dir,
return err;
}
-static int sysv_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int sysv_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
- return sysv_mknod(&init_user_ns, dir, dentry, mode, 0);
+ return sysv_mknod(&nop_mnt_idmap, dir, dentry, mode, 0);
}
-static int sysv_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int sysv_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
int err = -ENAMETOOLONG;
@@ -110,7 +110,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
return add_nondir(dentry, inode);
}
-static int sysv_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int sysv_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct inode * inode;
@@ -189,7 +189,7 @@ static int sysv_rmdir(struct inode * dir, struct dentry * dentry)
* Anybody can rename anything with this: the permission checks are left to the
* higher-level routines.
*/
-static int sysv_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+static int sysv_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 99ddf033da4f..5e122a5673c1 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -141,7 +141,7 @@ extern struct inode *sysv_iget(struct super_block *, unsigned int);
extern int sysv_write_inode(struct inode *, struct writeback_control *wbc);
extern int sysv_sync_inode(struct inode *);
extern void sysv_set_inode(struct inode *, dev_t);
-extern int sysv_getattr(struct user_namespace *, const struct path *,
+extern int sysv_getattr(struct mnt_idmap *, const struct path *,
struct kstat *, u32, unsigned int);
extern int sysv_init_icache(void);
extern void sysv_destroy_icache(void);
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index da85b3979195..57ac8aa4a724 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -67,7 +67,7 @@ static char *get_dname(struct dentry *dentry)
return name;
}
-static int tracefs_syscall_mkdir(struct user_namespace *mnt_userns,
+static int tracefs_syscall_mkdir(struct mnt_idmap *idmap,
struct inode *inode, struct dentry *dentry,
umode_t mode)
{
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 0f29cf201136..1e92c1730c16 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -95,7 +95,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
*/
inode->i_flags |= S_NOCMTIME;
- inode_init_owner(&init_user_ns, inode, dir, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
inode->i_mtime = inode->i_atime = inode->i_ctime =
current_time(inode);
inode->i_mapping->nrpages = 0;
@@ -283,7 +283,7 @@ static int ubifs_prepare_create(struct inode *dir, struct dentry *dentry,
return fscrypt_setup_filename(dir, &dentry->d_name, 0, nm);
}
-static int ubifs_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int ubifs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
struct inode *inode;
@@ -426,7 +426,7 @@ static void unlock_2_inodes(struct inode *inode1, struct inode *inode2)
mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
}
-static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+static int ubifs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
struct file *file, umode_t mode)
{
struct dentry *dentry = file->f_path.dentry;
@@ -979,7 +979,7 @@ out_fname:
return err;
}
-static int ubifs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int ubifs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct inode *inode;
@@ -1052,7 +1052,7 @@ out_budg:
return err;
}
-static int ubifs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int ubifs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct inode *inode;
@@ -1141,7 +1141,7 @@ out_budg:
return err;
}
-static int ubifs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int ubifs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
struct inode *inode;
@@ -1606,7 +1606,7 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
return err;
}
-static int ubifs_rename(struct user_namespace *mnt_userns,
+static int ubifs_rename(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
@@ -1631,7 +1631,7 @@ static int ubifs_rename(struct user_namespace *mnt_userns,
return do_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
}
-int ubifs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int ubifs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags)
{
loff_t size;
@@ -1654,7 +1654,7 @@ int ubifs_getattr(struct user_namespace *mnt_userns, const struct path *path,
STATX_ATTR_ENCRYPTED |
STATX_ATTR_IMMUTABLE);
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
stat->blksize = UBIFS_BLOCK_SIZE;
stat->size = ui->ui_size;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index f2353dd676ef..8cb5d76b301c 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1258,7 +1258,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
return err;
}
-int ubifs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ubifs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
int err;
@@ -1267,7 +1267,7 @@ int ubifs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
dbg_gen("ino %lu, mode %#x, ia_valid %#x",
inode->i_ino, inode->i_mode, attr->ia_valid);
- err = setattr_prepare(&init_user_ns, dentry, attr);
+ err = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (err)
return err;
@@ -1608,11 +1608,11 @@ static const char *ubifs_get_link(struct dentry *dentry,
return fscrypt_get_symlink(inode, ui->data, ui->data_len, done);
}
-static int ubifs_symlink_getattr(struct user_namespace *mnt_userns,
+static int ubifs_symlink_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
- ubifs_getattr(mnt_userns, path, stat, request_mask, query_flags);
+ ubifs_getattr(idmap, path, stat, request_mask, query_flags);
if (IS_ENCRYPTED(d_inode(path->dentry)))
return fscrypt_symlink_getattr(path, stat);
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 71bcebe45f9c..67c5108abd89 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -144,7 +144,7 @@ int ubifs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
return 0;
}
-int ubifs_fileattr_set(struct user_namespace *mnt_userns,
+int ubifs_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa)
{
struct inode *inode = d_inode(dentry);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 478bbbb5382f..9063b73536f8 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -2020,15 +2020,15 @@ int ubifs_calc_dark(const struct ubifs_info *c, int spc);
/* file.c */
int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync);
-int ubifs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ubifs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr);
int ubifs_update_time(struct inode *inode, struct timespec64 *time, int flags);
/* dir.c */
struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
umode_t mode, bool is_xattr);
-int ubifs_getattr(struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags);
+int ubifs_getattr(struct mnt_idmap *idmap, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int flags);
int ubifs_check_dir_empty(struct inode *dir);
/* xattr.c */
@@ -2085,7 +2085,7 @@ void ubifs_destroy_size_tree(struct ubifs_info *c);
/* ioctl.c */
int ubifs_fileattr_get(struct dentry *dentry, struct fileattr *fa);
-int ubifs_fileattr_set(struct user_namespace *mnt_userns,
+int ubifs_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
void ubifs_set_inode_flags(struct inode *inode);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 3db8486e3725..349228dd1191 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -699,7 +699,7 @@ static int xattr_get(const struct xattr_handler *handler,
}
static int xattr_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 8e597db4d971..14b9db4c80f0 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -36,18 +36,41 @@ static int read_block_bitmap(struct super_block *sb,
unsigned long bitmap_nr)
{
struct buffer_head *bh = NULL;
- int retval = 0;
+ int i;
+ int max_bits, off, count;
struct kernel_lb_addr loc;
loc.logicalBlockNum = bitmap->s_extPosition;
loc.partitionReferenceNum = UDF_SB(sb)->s_partition;
- bh = udf_tread(sb, udf_get_lb_pblock(sb, &loc, block));
+ bh = sb_bread(sb, udf_get_lb_pblock(sb, &loc, block));
+ bitmap->s_block_bitmap[bitmap_nr] = bh;
if (!bh)
- retval = -EIO;
+ return -EIO;
- bitmap->s_block_bitmap[bitmap_nr] = bh;
- return retval;
+ /* Check consistency of Space Bitmap buffer. */
+ max_bits = sb->s_blocksize * 8;
+ if (!bitmap_nr) {
+ off = sizeof(struct spaceBitmapDesc) << 3;
+ count = min(max_bits - off, bitmap->s_nr_groups);
+ } else {
+ /*
+ * Rough check if bitmap number is too big to have any bitmap
+ * blocks reserved.
+ */
+ if (bitmap_nr >
+ (bitmap->s_nr_groups >> (sb->s_blocksize_bits + 3)) + 2)
+ return 0;
+ off = 0;
+ count = bitmap->s_nr_groups - bitmap_nr * max_bits +
+ (sizeof(struct spaceBitmapDesc) << 3);
+ count = min(count, max_bits);
+ }
+
+ for (i = 0; i < count; i++)
+ if (udf_test_bit(i + off, bh->b_data))
+ return -EFSCORRUPTED;
+ return 0;
}
static int __load_block_bitmap(struct super_block *sb,
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index be640f4b2f2c..212393b12c22 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -39,26 +39,13 @@
static int udf_readdir(struct file *file, struct dir_context *ctx)
{
struct inode *dir = file_inode(file);
- struct udf_inode_info *iinfo = UDF_I(dir);
- struct udf_fileident_bh fibh = { .sbh = NULL, .ebh = NULL};
- struct fileIdentDesc *fi = NULL;
- struct fileIdentDesc cfi;
- udf_pblk_t block, iblock;
loff_t nf_pos, emit_pos = 0;
int flen;
- unsigned char *fname = NULL, *copy_name = NULL;
- unsigned char *nameptr;
- uint16_t liu;
- uint8_t lfi;
- loff_t size = udf_ext0_offset(dir) + dir->i_size;
- struct buffer_head *tmp, *bha[16];
- struct kernel_lb_addr eloc;
- uint32_t elen;
- sector_t offset;
- int i, num, ret = 0;
- struct extent_position epos = { NULL, 0, {0, 0} };
+ unsigned char *fname = NULL;
+ int ret = 0;
struct super_block *sb = dir->i_sb;
bool pos_valid = false;
+ struct udf_fileident_iter iter;
if (ctx->pos == 0) {
if (!dir_emit_dot(file, ctx))
@@ -66,7 +53,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
ctx->pos = 1;
}
nf_pos = (ctx->pos - 1) << 2;
- if (nf_pos >= size)
+ if (nf_pos >= dir->i_size)
goto out;
/*
@@ -90,138 +77,57 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
goto out;
}
- if (nf_pos == 0)
- nf_pos = udf_ext0_offset(dir);
-
- fibh.soffset = fibh.eoffset = nf_pos & (sb->s_blocksize - 1);
- if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
- if (inode_bmap(dir, nf_pos >> sb->s_blocksize_bits,
- &epos, &eloc, &elen, &offset)
- != (EXT_RECORDED_ALLOCATED >> 30)) {
- ret = -ENOENT;
- goto out;
- }
- block = udf_get_lb_pblock(sb, &eloc, offset);
- if ((++offset << sb->s_blocksize_bits) < elen) {
- if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
- epos.offset -= sizeof(struct short_ad);
- else if (iinfo->i_alloc_type ==
- ICBTAG_FLAG_AD_LONG)
- epos.offset -= sizeof(struct long_ad);
- } else {
- offset = 0;
- }
-
- if (!(fibh.sbh = fibh.ebh = udf_tread(sb, block))) {
- ret = -EIO;
- goto out;
- }
-
- if (!(offset & ((16 >> (sb->s_blocksize_bits - 9)) - 1))) {
- i = 16 >> (sb->s_blocksize_bits - 9);
- if (i + offset > (elen >> sb->s_blocksize_bits))
- i = (elen >> sb->s_blocksize_bits) - offset;
- for (num = 0; i > 0; i--) {
- block = udf_get_lb_pblock(sb, &eloc, offset + i);
- tmp = udf_tgetblk(sb, block);
- if (tmp && !buffer_uptodate(tmp) && !buffer_locked(tmp))
- bha[num++] = tmp;
- else
- brelse(tmp);
- }
- if (num) {
- bh_readahead_batch(num, bha, REQ_RAHEAD);
- for (i = 0; i < num; i++)
- brelse(bha[i]);
- }
- }
- }
-
- while (nf_pos < size) {
+ for (ret = udf_fiiter_init(&iter, dir, nf_pos);
+ !ret && iter.pos < dir->i_size;
+ ret = udf_fiiter_advance(&iter)) {
struct kernel_lb_addr tloc;
- loff_t cur_pos = nf_pos;
+ udf_pblk_t iblock;
- /* Update file position only if we got past the current one */
- if (nf_pos >= emit_pos) {
- ctx->pos = (nf_pos >> 2) + 1;
- pos_valid = true;
- }
-
- fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc,
- &elen, &offset);
- if (!fi)
- goto out;
/* Still not at offset where user asked us to read from? */
- if (cur_pos < emit_pos)
+ if (iter.pos < emit_pos)
continue;
- liu = le16_to_cpu(cfi.lengthOfImpUse);
- lfi = cfi.lengthFileIdent;
-
- if (fibh.sbh == fibh.ebh) {
- nameptr = udf_get_fi_ident(fi);
- } else {
- int poffset; /* Unpaded ending offset */
-
- poffset = fibh.soffset + sizeof(struct fileIdentDesc) + liu + lfi;
-
- if (poffset >= lfi) {
- nameptr = (char *)(fibh.ebh->b_data + poffset - lfi);
- } else {
- if (!copy_name) {
- copy_name = kmalloc(UDF_NAME_LEN,
- GFP_NOFS);
- if (!copy_name) {
- ret = -ENOMEM;
- goto out;
- }
- }
- nameptr = copy_name;
- memcpy(nameptr, udf_get_fi_ident(fi),
- lfi - poffset);
- memcpy(nameptr + lfi - poffset,
- fibh.ebh->b_data, poffset);
- }
- }
+ /* Update file position only if we got past the current one */
+ pos_valid = true;
+ ctx->pos = (iter.pos >> 2) + 1;
- if ((cfi.fileCharacteristics & FID_FILE_CHAR_DELETED) != 0) {
+ if (iter.fi.fileCharacteristics & FID_FILE_CHAR_DELETED) {
if (!UDF_QUERY_FLAG(sb, UDF_FLAG_UNDELETE))
continue;
}
- if ((cfi.fileCharacteristics & FID_FILE_CHAR_HIDDEN) != 0) {
+ if (iter.fi.fileCharacteristics & FID_FILE_CHAR_HIDDEN) {
if (!UDF_QUERY_FLAG(sb, UDF_FLAG_UNHIDE))
continue;
}
- if (cfi.fileCharacteristics & FID_FILE_CHAR_PARENT) {
+ if (iter.fi.fileCharacteristics & FID_FILE_CHAR_PARENT) {
if (!dir_emit_dotdot(file, ctx))
- goto out;
+ goto out_iter;
continue;
}
- flen = udf_get_filename(sb, nameptr, lfi, fname, UDF_NAME_LEN);
+ flen = udf_get_filename(sb, iter.name,
+ iter.fi.lengthFileIdent, fname, UDF_NAME_LEN);
if (flen < 0)
continue;
- tloc = lelb_to_cpu(cfi.icb.extLocation);
+ tloc = lelb_to_cpu(iter.fi.icb.extLocation);
iblock = udf_get_lb_pblock(sb, &tloc, 0);
if (!dir_emit(ctx, fname, flen, iblock, DT_UNKNOWN))
- goto out;
- } /* end while */
-
- ctx->pos = (nf_pos >> 2) + 1;
- pos_valid = true;
+ goto out_iter;
+ }
+ if (!ret) {
+ ctx->pos = (iter.pos >> 2) + 1;
+ pos_valid = true;
+ }
+out_iter:
+ udf_fiiter_release(&iter);
out:
if (pos_valid)
file->f_version = inode_query_iversion(dir);
- if (fibh.sbh != fibh.ebh)
- brelse(fibh.ebh);
- brelse(fibh.sbh);
- brelse(epos.bh);
kfree(fname);
- kfree(copy_name);
return ret;
}
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 16bcf2c6b8b3..654536d2b609 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -17,183 +17,478 @@
#include <linux/fs.h>
#include <linux/string.h>
#include <linux/bio.h>
+#include <linux/crc-itu-t.h>
+#include <linux/iversion.h>
-struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
- struct udf_fileident_bh *fibh,
- struct fileIdentDesc *cfi,
- struct extent_position *epos,
- struct kernel_lb_addr *eloc, uint32_t *elen,
- sector_t *offset)
+static int udf_verify_fi(struct udf_fileident_iter *iter)
{
- struct fileIdentDesc *fi;
- int i, num;
- udf_pblk_t block;
- struct buffer_head *tmp, *bha[16];
- struct udf_inode_info *iinfo = UDF_I(dir);
-
- fibh->soffset = fibh->eoffset;
+ unsigned int len;
+
+ if (iter->fi.descTag.tagIdent != cpu_to_le16(TAG_IDENT_FID)) {
+ udf_err(iter->dir->i_sb,
+ "directory (ino %lu) has entry at pos %llu with incorrect tag %x\n",
+ iter->dir->i_ino, (unsigned long long)iter->pos,
+ le16_to_cpu(iter->fi.descTag.tagIdent));
+ return -EFSCORRUPTED;
+ }
+ len = udf_dir_entry_len(&iter->fi);
+ if (le16_to_cpu(iter->fi.lengthOfImpUse) & 3) {
+ udf_err(iter->dir->i_sb,
+ "directory (ino %lu) has entry at pos %llu with unaligned length of impUse field\n",
+ iter->dir->i_ino, (unsigned long long)iter->pos);
+ return -EFSCORRUPTED;
+ }
+ /*
+ * This is in fact allowed by the spec due to long impUse field but
+ * we don't support it. If there is real media with this large impUse
+ * field, support can be added.
+ */
+ if (len > 1 << iter->dir->i_blkbits) {
+ udf_err(iter->dir->i_sb,
+ "directory (ino %lu) has too big (%u) entry at pos %llu\n",
+ iter->dir->i_ino, len, (unsigned long long)iter->pos);
+ return -EFSCORRUPTED;
+ }
+ if (iter->pos + len > iter->dir->i_size) {
+ udf_err(iter->dir->i_sb,
+ "directory (ino %lu) has entry past directory size at pos %llu\n",
+ iter->dir->i_ino, (unsigned long long)iter->pos);
+ return -EFSCORRUPTED;
+ }
+ if (udf_dir_entry_len(&iter->fi) !=
+ sizeof(struct tag) + le16_to_cpu(iter->fi.descTag.descCRCLength)) {
+ udf_err(iter->dir->i_sb,
+ "directory (ino %lu) has entry where CRC length (%u) does not match entry length (%u)\n",
+ iter->dir->i_ino,
+ (unsigned)le16_to_cpu(iter->fi.descTag.descCRCLength),
+ (unsigned)(udf_dir_entry_len(&iter->fi) -
+ sizeof(struct tag)));
+ return -EFSCORRUPTED;
+ }
+ return 0;
+}
+static int udf_copy_fi(struct udf_fileident_iter *iter)
+{
+ struct udf_inode_info *iinfo = UDF_I(iter->dir);
+ u32 blksize = 1 << iter->dir->i_blkbits;
+ u32 off, len, nameoff;
+ int err;
+
+ /* Skip copying when we are at EOF */
+ if (iter->pos >= iter->dir->i_size) {
+ iter->name = NULL;
+ return 0;
+ }
+ if (iter->dir->i_size < iter->pos + sizeof(struct fileIdentDesc)) {
+ udf_err(iter->dir->i_sb,
+ "directory (ino %lu) has entry straddling EOF\n",
+ iter->dir->i_ino);
+ return -EFSCORRUPTED;
+ }
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
- fi = udf_get_fileident(iinfo->i_data -
- (iinfo->i_efe ?
- sizeof(struct extendedFileEntry) :
- sizeof(struct fileEntry)),
- dir->i_sb->s_blocksize,
- &(fibh->eoffset));
- if (!fi)
- return NULL;
-
- *nf_pos += fibh->eoffset - fibh->soffset;
-
- memcpy((uint8_t *)cfi, (uint8_t *)fi,
+ memcpy(&iter->fi, iinfo->i_data + iinfo->i_lenEAttr + iter->pos,
sizeof(struct fileIdentDesc));
-
- return fi;
+ err = udf_verify_fi(iter);
+ if (err < 0)
+ return err;
+ iter->name = iinfo->i_data + iinfo->i_lenEAttr + iter->pos +
+ sizeof(struct fileIdentDesc) +
+ le16_to_cpu(iter->fi.lengthOfImpUse);
+ return 0;
}
- if (fibh->eoffset == dir->i_sb->s_blocksize) {
- uint32_t lextoffset = epos->offset;
- unsigned char blocksize_bits = dir->i_sb->s_blocksize_bits;
-
- if (udf_next_aext(dir, epos, eloc, elen, 1) !=
- (EXT_RECORDED_ALLOCATED >> 30))
- return NULL;
+ off = iter->pos & (blksize - 1);
+ len = min_t(int, sizeof(struct fileIdentDesc), blksize - off);
+ memcpy(&iter->fi, iter->bh[0]->b_data + off, len);
+ if (len < sizeof(struct fileIdentDesc))
+ memcpy((char *)(&iter->fi) + len, iter->bh[1]->b_data,
+ sizeof(struct fileIdentDesc) - len);
+ err = udf_verify_fi(iter);
+ if (err < 0)
+ return err;
+
+ /* Handle directory entry name */
+ nameoff = off + sizeof(struct fileIdentDesc) +
+ le16_to_cpu(iter->fi.lengthOfImpUse);
+ if (off + udf_dir_entry_len(&iter->fi) <= blksize) {
+ iter->name = iter->bh[0]->b_data + nameoff;
+ } else if (nameoff >= blksize) {
+ iter->name = iter->bh[1]->b_data + (nameoff - blksize);
+ } else {
+ iter->name = iter->namebuf;
+ len = blksize - nameoff;
+ memcpy(iter->name, iter->bh[0]->b_data + nameoff, len);
+ memcpy(iter->name + len, iter->bh[1]->b_data,
+ iter->fi.lengthFileIdent - len);
+ }
+ return 0;
+}
- block = udf_get_lb_pblock(dir->i_sb, eloc, *offset);
+/* Readahead 8k once we are at 8k boundary */
+static void udf_readahead_dir(struct udf_fileident_iter *iter)
+{
+ unsigned int ralen = 16 >> (iter->dir->i_blkbits - 9);
+ struct buffer_head *tmp, *bha[16];
+ int i, num;
+ udf_pblk_t blk;
+
+ if (iter->loffset & (ralen - 1))
+ return;
+
+ if (iter->loffset + ralen > (iter->elen >> iter->dir->i_blkbits))
+ ralen = (iter->elen >> iter->dir->i_blkbits) - iter->loffset;
+ num = 0;
+ for (i = 0; i < ralen; i++) {
+ blk = udf_get_lb_pblock(iter->dir->i_sb, &iter->eloc,
+ iter->loffset + i);
+ tmp = sb_getblk(iter->dir->i_sb, blk);
+ if (tmp && !buffer_uptodate(tmp) && !buffer_locked(tmp))
+ bha[num++] = tmp;
+ else
+ brelse(tmp);
+ }
+ if (num) {
+ bh_readahead_batch(num, bha, REQ_RAHEAD);
+ for (i = 0; i < num; i++)
+ brelse(bha[i]);
+ }
+}
- (*offset)++;
+static struct buffer_head *udf_fiiter_bread_blk(struct udf_fileident_iter *iter)
+{
+ udf_pblk_t blk;
- if ((*offset << blocksize_bits) >= *elen)
- *offset = 0;
- else
- epos->offset = lextoffset;
+ udf_readahead_dir(iter);
+ blk = udf_get_lb_pblock(iter->dir->i_sb, &iter->eloc, iter->loffset);
+ return sb_bread(iter->dir->i_sb, blk);
+}
- brelse(fibh->sbh);
- fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block);
- if (!fibh->sbh)
- return NULL;
- fibh->soffset = fibh->eoffset = 0;
-
- if (!(*offset & ((16 >> (blocksize_bits - 9)) - 1))) {
- i = 16 >> (blocksize_bits - 9);
- if (i + *offset > (*elen >> blocksize_bits))
- i = (*elen >> blocksize_bits)-*offset;
- for (num = 0; i > 0; i--) {
- block = udf_get_lb_pblock(dir->i_sb, eloc,
- *offset + i);
- tmp = udf_tgetblk(dir->i_sb, block);
- if (tmp && !buffer_uptodate(tmp) &&
- !buffer_locked(tmp))
- bha[num++] = tmp;
- else
- brelse(tmp);
- }
- if (num) {
- bh_readahead_batch(num, bha, REQ_RAHEAD);
- for (i = 0; i < num; i++)
- brelse(bha[i]);
- }
+/*
+ * Updates loffset to point to next directory block; eloc, elen & epos are
+ * updated if we need to traverse to the next extent as well.
+ */
+static int udf_fiiter_advance_blk(struct udf_fileident_iter *iter)
+{
+ iter->loffset++;
+ if (iter->loffset < DIV_ROUND_UP(iter->elen, 1<<iter->dir->i_blkbits))
+ return 0;
+
+ iter->loffset = 0;
+ if (udf_next_aext(iter->dir, &iter->epos, &iter->eloc, &iter->elen, 1)
+ != (EXT_RECORDED_ALLOCATED >> 30)) {
+ if (iter->pos == iter->dir->i_size) {
+ iter->elen = 0;
+ return 0;
}
- } else if (fibh->sbh != fibh->ebh) {
- brelse(fibh->sbh);
- fibh->sbh = fibh->ebh;
+ udf_err(iter->dir->i_sb,
+ "extent after position %llu not allocated in directory (ino %lu)\n",
+ (unsigned long long)iter->pos, iter->dir->i_ino);
+ return -EFSCORRUPTED;
}
+ return 0;
+}
- fi = udf_get_fileident(fibh->sbh->b_data, dir->i_sb->s_blocksize,
- &(fibh->eoffset));
-
- if (!fi)
- return NULL;
+static int udf_fiiter_load_bhs(struct udf_fileident_iter *iter)
+{
+ int blksize = 1 << iter->dir->i_blkbits;
+ int off = iter->pos & (blksize - 1);
+ int err;
+ struct fileIdentDesc *fi;
- *nf_pos += fibh->eoffset - fibh->soffset;
+ /* Is there any further extent we can map from? */
+ if (!iter->bh[0] && iter->elen) {
+ iter->bh[0] = udf_fiiter_bread_blk(iter);
+ if (!iter->bh[0]) {
+ err = -ENOMEM;
+ goto out_brelse;
+ }
+ if (!buffer_uptodate(iter->bh[0])) {
+ err = -EIO;
+ goto out_brelse;
+ }
+ }
+ /* There's no next block so we are done */
+ if (iter->pos >= iter->dir->i_size)
+ return 0;
+ /* Need to fetch next block as well? */
+ if (off + sizeof(struct fileIdentDesc) > blksize)
+ goto fetch_next;
+ fi = (struct fileIdentDesc *)(iter->bh[0]->b_data + off);
+ /* Need to fetch next block to get name? */
+ if (off + udf_dir_entry_len(fi) > blksize) {
+fetch_next:
+ err = udf_fiiter_advance_blk(iter);
+ if (err)
+ goto out_brelse;
+ iter->bh[1] = udf_fiiter_bread_blk(iter);
+ if (!iter->bh[1]) {
+ err = -ENOMEM;
+ goto out_brelse;
+ }
+ if (!buffer_uptodate(iter->bh[1])) {
+ err = -EIO;
+ goto out_brelse;
+ }
+ }
+ return 0;
+out_brelse:
+ brelse(iter->bh[0]);
+ brelse(iter->bh[1]);
+ iter->bh[0] = iter->bh[1] = NULL;
+ return err;
+}
- if (fibh->eoffset <= dir->i_sb->s_blocksize) {
- memcpy((uint8_t *)cfi, (uint8_t *)fi,
- sizeof(struct fileIdentDesc));
- } else if (fibh->eoffset > dir->i_sb->s_blocksize) {
- uint32_t lextoffset = epos->offset;
+int udf_fiiter_init(struct udf_fileident_iter *iter, struct inode *dir,
+ loff_t pos)
+{
+ struct udf_inode_info *iinfo = UDF_I(dir);
+ int err = 0;
+
+ iter->dir = dir;
+ iter->bh[0] = iter->bh[1] = NULL;
+ iter->pos = pos;
+ iter->elen = 0;
+ iter->epos.bh = NULL;
+ iter->name = NULL;
+ /*
+ * When directory is verified, we don't expect directory iteration to
+ * fail and it can be difficult to undo without corrupting filesystem.
+ * So just do not allow memory allocation failures here.
+ */
+ iter->namebuf = kmalloc(UDF_NAME_LEN_CS0, GFP_KERNEL | __GFP_NOFAIL);
- if (udf_next_aext(dir, epos, eloc, elen, 1) !=
- (EXT_RECORDED_ALLOCATED >> 30))
- return NULL;
+ if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
+ err = udf_copy_fi(iter);
+ goto out;
+ }
- block = udf_get_lb_pblock(dir->i_sb, eloc, *offset);
+ if (inode_bmap(dir, iter->pos >> dir->i_blkbits, &iter->epos,
+ &iter->eloc, &iter->elen, &iter->loffset) !=
+ (EXT_RECORDED_ALLOCATED >> 30)) {
+ if (pos == dir->i_size)
+ return 0;
+ udf_err(dir->i_sb,
+ "position %llu not allocated in directory (ino %lu)\n",
+ (unsigned long long)pos, dir->i_ino);
+ err = -EFSCORRUPTED;
+ goto out;
+ }
+ err = udf_fiiter_load_bhs(iter);
+ if (err < 0)
+ goto out;
+ err = udf_copy_fi(iter);
+out:
+ if (err < 0)
+ udf_fiiter_release(iter);
+ return err;
+}
- (*offset)++;
+int udf_fiiter_advance(struct udf_fileident_iter *iter)
+{
+ unsigned int oldoff, len;
+ int blksize = 1 << iter->dir->i_blkbits;
+ int err;
+
+ oldoff = iter->pos & (blksize - 1);
+ len = udf_dir_entry_len(&iter->fi);
+ iter->pos += len;
+ if (UDF_I(iter->dir)->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
+ if (oldoff + len >= blksize) {
+ brelse(iter->bh[0]);
+ iter->bh[0] = NULL;
+ /* Next block already loaded? */
+ if (iter->bh[1]) {
+ iter->bh[0] = iter->bh[1];
+ iter->bh[1] = NULL;
+ } else {
+ err = udf_fiiter_advance_blk(iter);
+ if (err < 0)
+ return err;
+ }
+ }
+ err = udf_fiiter_load_bhs(iter);
+ if (err < 0)
+ return err;
+ }
+ return udf_copy_fi(iter);
+}
- if ((*offset << dir->i_sb->s_blocksize_bits) >= *elen)
- *offset = 0;
- else
- epos->offset = lextoffset;
+void udf_fiiter_release(struct udf_fileident_iter *iter)
+{
+ iter->dir = NULL;
+ brelse(iter->bh[0]);
+ brelse(iter->bh[1]);
+ iter->bh[0] = iter->bh[1] = NULL;
+ kfree(iter->namebuf);
+ iter->namebuf = NULL;
+}
- fibh->soffset -= dir->i_sb->s_blocksize;
- fibh->eoffset -= dir->i_sb->s_blocksize;
+static void udf_copy_to_bufs(void *buf1, int len1, void *buf2, int len2,
+ int off, void *src, int len)
+{
+ int copy;
+
+ if (off >= len1) {
+ off -= len1;
+ } else {
+ copy = min(off + len, len1) - off;
+ memcpy(buf1 + off, src, copy);
+ src += copy;
+ len -= copy;
+ off = 0;
+ }
+ if (len > 0) {
+ if (WARN_ON_ONCE(off + len > len2 || !buf2))
+ return;
+ memcpy(buf2 + off, src, len);
+ }
+}
- fibh->ebh = udf_tread(dir->i_sb, block);
- if (!fibh->ebh)
- return NULL;
+static uint16_t udf_crc_fi_bufs(void *buf1, int len1, void *buf2, int len2,
+ int off, int len)
+{
+ int copy;
+ uint16_t crc = 0;
+
+ if (off >= len1) {
+ off -= len1;
+ } else {
+ copy = min(off + len, len1) - off;
+ crc = crc_itu_t(crc, buf1 + off, copy);
+ len -= copy;
+ off = 0;
+ }
+ if (len > 0) {
+ if (WARN_ON_ONCE(off + len > len2 || !buf2))
+ return 0;
+ crc = crc_itu_t(crc, buf2 + off, len);
+ }
+ return crc;
+}
- if (sizeof(struct fileIdentDesc) > -fibh->soffset) {
- int fi_len;
+static void udf_copy_fi_to_bufs(char *buf1, int len1, char *buf2, int len2,
+ int off, struct fileIdentDesc *fi,
+ uint8_t *impuse, uint8_t *name)
+{
+ uint16_t crc;
+ int fioff = off;
+ int crcoff = off + sizeof(struct tag);
+ unsigned int crclen = udf_dir_entry_len(fi) - sizeof(struct tag);
+ char zeros[UDF_NAME_PAD] = {};
+ int endoff = off + udf_dir_entry_len(fi);
+
+ udf_copy_to_bufs(buf1, len1, buf2, len2, off, fi,
+ sizeof(struct fileIdentDesc));
+ off += sizeof(struct fileIdentDesc);
+ if (impuse)
+ udf_copy_to_bufs(buf1, len1, buf2, len2, off, impuse,
+ le16_to_cpu(fi->lengthOfImpUse));
+ off += le16_to_cpu(fi->lengthOfImpUse);
+ if (name) {
+ udf_copy_to_bufs(buf1, len1, buf2, len2, off, name,
+ fi->lengthFileIdent);
+ off += fi->lengthFileIdent;
+ udf_copy_to_bufs(buf1, len1, buf2, len2, off, zeros,
+ endoff - off);
+ }
- memcpy((uint8_t *)cfi, (uint8_t *)fi, -fibh->soffset);
- memcpy((uint8_t *)cfi - fibh->soffset,
- fibh->ebh->b_data,
- sizeof(struct fileIdentDesc) + fibh->soffset);
+ crc = udf_crc_fi_bufs(buf1, len1, buf2, len2, crcoff, crclen);
+ fi->descTag.descCRC = cpu_to_le16(crc);
+ fi->descTag.descCRCLength = cpu_to_le16(crclen);
+ fi->descTag.tagChecksum = udf_tag_checksum(&fi->descTag);
- fi_len = udf_dir_entry_len(cfi);
- *nf_pos += fi_len - (fibh->eoffset - fibh->soffset);
- fibh->eoffset = fibh->soffset + fi_len;
- } else {
- memcpy((uint8_t *)cfi, (uint8_t *)fi,
- sizeof(struct fileIdentDesc));
- }
- }
- /* Got last entry outside of dir size - fs is corrupted! */
- if (*nf_pos > dir->i_size)
- return NULL;
- return fi;
+ udf_copy_to_bufs(buf1, len1, buf2, len2, fioff, fi, sizeof(struct tag));
}
-struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, int *offset)
+void udf_fiiter_write_fi(struct udf_fileident_iter *iter, uint8_t *impuse)
{
- struct fileIdentDesc *fi;
- int lengthThisIdent;
- uint8_t *ptr;
- int padlen;
+ struct udf_inode_info *iinfo = UDF_I(iter->dir);
+ void *buf1, *buf2 = NULL;
+ int len1, len2 = 0, off;
+ int blksize = 1 << iter->dir->i_blkbits;
- if ((!buffer) || (!offset)) {
- udf_debug("invalidparms, buffer=%p, offset=%p\n",
- buffer, offset);
- return NULL;
+ off = iter->pos & (blksize - 1);
+ if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
+ buf1 = iinfo->i_data + iinfo->i_lenEAttr;
+ len1 = iter->dir->i_size;
+ } else {
+ buf1 = iter->bh[0]->b_data;
+ len1 = blksize;
+ if (iter->bh[1]) {
+ buf2 = iter->bh[1]->b_data;
+ len2 = blksize;
+ }
}
- ptr = buffer;
+ udf_copy_fi_to_bufs(buf1, len1, buf2, len2, off, &iter->fi, impuse,
+ iter->name == iter->namebuf ? iter->name : NULL);
- if ((*offset > 0) && (*offset < bufsize))
- ptr += *offset;
- fi = (struct fileIdentDesc *)ptr;
- if (fi->descTag.tagIdent != cpu_to_le16(TAG_IDENT_FID)) {
- udf_debug("0x%x != TAG_IDENT_FID\n",
- le16_to_cpu(fi->descTag.tagIdent));
- udf_debug("offset: %d sizeof: %lu bufsize: %d\n",
- *offset, (unsigned long)sizeof(struct fileIdentDesc),
- bufsize);
- return NULL;
+ if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
+ mark_inode_dirty(iter->dir);
+ } else {
+ mark_buffer_dirty_inode(iter->bh[0], iter->dir);
+ if (iter->bh[1])
+ mark_buffer_dirty_inode(iter->bh[1], iter->dir);
}
- if ((*offset + sizeof(struct fileIdentDesc)) > bufsize)
- lengthThisIdent = sizeof(struct fileIdentDesc);
- else
- lengthThisIdent = sizeof(struct fileIdentDesc) +
- fi->lengthFileIdent + le16_to_cpu(fi->lengthOfImpUse);
+ inode_inc_iversion(iter->dir);
+}
- /* we need to figure padding, too! */
- padlen = lengthThisIdent % UDF_NAME_PAD;
- if (padlen)
- lengthThisIdent += (UDF_NAME_PAD - padlen);
- *offset = *offset + lengthThisIdent;
+void udf_fiiter_update_elen(struct udf_fileident_iter *iter, uint32_t new_elen)
+{
+ struct udf_inode_info *iinfo = UDF_I(iter->dir);
+ int diff = new_elen - iter->elen;
+
+ /* Skip update when we already went past the last extent */
+ if (!iter->elen)
+ return;
+ iter->elen = new_elen;
+ if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
+ iter->epos.offset -= sizeof(struct short_ad);
+ else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
+ iter->epos.offset -= sizeof(struct long_ad);
+ udf_write_aext(iter->dir, &iter->epos, &iter->eloc, iter->elen, 1);
+ iinfo->i_lenExtents += diff;
+ mark_inode_dirty(iter->dir);
+}
- return fi;
+/* Append new block to directory. @iter is expected to point at EOF */
+int udf_fiiter_append_blk(struct udf_fileident_iter *iter)
+{
+ struct udf_inode_info *iinfo = UDF_I(iter->dir);
+ int blksize = 1 << iter->dir->i_blkbits;
+ struct buffer_head *bh;
+ sector_t block;
+ uint32_t old_elen = iter->elen;
+ int err;
+
+ if (WARN_ON_ONCE(iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB))
+ return -EINVAL;
+
+ /* Round up last extent in the file */
+ udf_fiiter_update_elen(iter, ALIGN(iter->elen, blksize));
+
+ /* Allocate new block and refresh mapping information */
+ block = iinfo->i_lenExtents >> iter->dir->i_blkbits;
+ bh = udf_bread(iter->dir, block, 1, &err);
+ if (!bh) {
+ udf_fiiter_update_elen(iter, old_elen);
+ return err;
+ }
+ if (inode_bmap(iter->dir, block, &iter->epos, &iter->eloc, &iter->elen,
+ &iter->loffset) != (EXT_RECORDED_ALLOCATED >> 30)) {
+ udf_err(iter->dir->i_sb,
+ "block %llu not allocated in directory (ino %lu)\n",
+ (unsigned long long)block, iter->dir->i_ino);
+ return -EFSCORRUPTED;
+ }
+ if (!(iter->pos & (blksize - 1))) {
+ brelse(iter->bh[0]);
+ iter->bh[0] = bh;
+ } else {
+ iter->bh[1] = bh;
+ }
+ return 0;
}
struct short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offset,
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 5c659e23e578..8238f742377b 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -38,100 +38,55 @@
#include "udf_i.h"
#include "udf_sb.h"
-static void __udf_adinicb_readpage(struct page *page)
+static vm_fault_t udf_page_mkwrite(struct vm_fault *vmf)
{
- struct inode *inode = page->mapping->host;
- char *kaddr;
- struct udf_inode_info *iinfo = UDF_I(inode);
- loff_t isize = i_size_read(inode);
-
- /*
- * We have to be careful here as truncate can change i_size under us.
- * So just sample it once and use the same value everywhere.
- */
- kaddr = kmap_atomic(page);
- memcpy(kaddr, iinfo->i_data + iinfo->i_lenEAttr, isize);
- memset(kaddr + isize, 0, PAGE_SIZE - isize);
- flush_dcache_page(page);
- SetPageUptodate(page);
- kunmap_atomic(kaddr);
-}
-
-static int udf_adinicb_read_folio(struct file *file, struct folio *folio)
-{
- BUG_ON(!folio_test_locked(folio));
- __udf_adinicb_readpage(&folio->page);
- folio_unlock(folio);
-
- return 0;
-}
-
-static int udf_adinicb_writepage(struct page *page,
- struct writeback_control *wbc)
-{
- struct inode *inode = page->mapping->host;
- char *kaddr;
- struct udf_inode_info *iinfo = UDF_I(inode);
-
- BUG_ON(!PageLocked(page));
-
- kaddr = kmap_atomic(page);
- memcpy(iinfo->i_data + iinfo->i_lenEAttr, kaddr, i_size_read(inode));
- SetPageUptodate(page);
- kunmap_atomic(kaddr);
- mark_inode_dirty(inode);
- unlock_page(page);
-
- return 0;
-}
-
-static int udf_adinicb_write_begin(struct file *file,
- struct address_space *mapping, loff_t pos,
- unsigned len, struct page **pagep,
- void **fsdata)
-{
- struct page *page;
-
- if (WARN_ON_ONCE(pos >= PAGE_SIZE))
- return -EIO;
- page = grab_cache_page_write_begin(mapping, 0);
- if (!page)
- return -ENOMEM;
- *pagep = page;
-
- if (!PageUptodate(page))
- __udf_adinicb_readpage(page);
- return 0;
-}
-
-static ssize_t udf_adinicb_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
-{
- /* Fallback to buffered I/O. */
- return 0;
-}
+ struct vm_area_struct *vma = vmf->vma;
+ struct inode *inode = file_inode(vma->vm_file);
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page = vmf->page;
+ loff_t size;
+ unsigned int end;
+ vm_fault_t ret = VM_FAULT_LOCKED;
+ int err;
-static int udf_adinicb_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
-{
- struct inode *inode = page->mapping->host;
- loff_t last_pos = pos + copied;
- if (last_pos > inode->i_size)
- i_size_write(inode, last_pos);
+ sb_start_pagefault(inode->i_sb);
+ file_update_time(vma->vm_file);
+ filemap_invalidate_lock_shared(mapping);
+ lock_page(page);
+ size = i_size_read(inode);
+ if (page->mapping != inode->i_mapping || page_offset(page) >= size) {
+ unlock_page(page);
+ ret = VM_FAULT_NOPAGE;
+ goto out_unlock;
+ }
+ /* Space is already allocated for in-ICB file */
+ if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
+ goto out_dirty;
+ if (page->index == size >> PAGE_SHIFT)
+ end = size & ~PAGE_MASK;
+ else
+ end = PAGE_SIZE;
+ err = __block_write_begin(page, 0, end, udf_get_block);
+ if (!err)
+ err = block_commit_write(page, 0, end);
+ if (err < 0) {
+ unlock_page(page);
+ ret = block_page_mkwrite_return(err);
+ goto out_unlock;
+ }
+out_dirty:
set_page_dirty(page);
- unlock_page(page);
- put_page(page);
- return copied;
+ wait_for_stable_page(page);
+out_unlock:
+ filemap_invalidate_unlock_shared(mapping);
+ sb_end_pagefault(inode->i_sb);
+ return ret;
}
-const struct address_space_operations udf_adinicb_aops = {
- .dirty_folio = block_dirty_folio,
- .invalidate_folio = block_invalidate_folio,
- .read_folio = udf_adinicb_read_folio,
- .writepage = udf_adinicb_writepage,
- .write_begin = udf_adinicb_write_begin,
- .write_end = udf_adinicb_write_end,
- .direct_IO = udf_adinicb_direct_IO,
+static const struct vm_operations_struct udf_file_vm_ops = {
+ .fault = filemap_fault,
+ .map_pages = filemap_map_pages,
+ .page_mkwrite = udf_page_mkwrite,
};
static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
@@ -140,7 +95,6 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct udf_inode_info *iinfo = UDF_I(inode);
- int err;
inode_lock(inode);
@@ -148,27 +102,23 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (retval <= 0)
goto out;
- down_write(&iinfo->i_data_sem);
- if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
- loff_t end = iocb->ki_pos + iov_iter_count(from);
-
- if (inode->i_sb->s_blocksize <
- (udf_file_entry_alloc_offset(inode) + end)) {
- err = udf_expand_file_adinicb(inode);
- if (err) {
- inode_unlock(inode);
- udf_debug("udf_expand_adinicb: err=%d\n", err);
- return err;
- }
- } else {
- iinfo->i_lenAlloc = max(end, inode->i_size);
- up_write(&iinfo->i_data_sem);
- }
- } else
- up_write(&iinfo->i_data_sem);
+ if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB &&
+ inode->i_sb->s_blocksize < (udf_file_entry_alloc_offset(inode) +
+ iocb->ki_pos + iov_iter_count(from))) {
+ filemap_invalidate_lock(inode->i_mapping);
+ retval = udf_expand_file_adinicb(inode);
+ filemap_invalidate_unlock(inode->i_mapping);
+ if (retval)
+ goto out;
+ }
retval = __generic_file_write_iter(iocb, from);
out:
+ if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB && retval > 0) {
+ down_write(&iinfo->i_data_sem);
+ iinfo->i_lenAlloc = inode->i_size;
+ up_write(&iinfo->i_data_sem);
+ }
inode_unlock(inode);
if (retval > 0) {
@@ -243,11 +193,19 @@ static int udf_release_file(struct inode *inode, struct file *filp)
return 0;
}
+static int udf_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ file_accessed(file);
+ vma->vm_ops = &udf_file_vm_ops;
+
+ return 0;
+}
+
const struct file_operations udf_file_operations = {
.read_iter = generic_file_read_iter,
.unlocked_ioctl = udf_ioctl,
.open = generic_file_open,
- .mmap = generic_file_mmap,
+ .mmap = udf_file_mmap,
.write_iter = udf_file_write_iter,
.release = udf_release_file,
.fsync = generic_file_fsync,
@@ -256,14 +214,14 @@ const struct file_operations udf_file_operations = {
.llseek = generic_file_llseek,
};
-static int udf_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+static int udf_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
struct super_block *sb = inode->i_sb;
int error;
- error = setattr_prepare(&init_user_ns, dentry, attr);
+ error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (error)
return error;
@@ -286,7 +244,7 @@ static int udf_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if (attr->ia_valid & ATTR_MODE)
udf_update_extra_perms(inode, attr->ia_mode);
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(&nop_mnt_idmap, inode, attr);
mark_inode_dirty(inode);
return 0;
}
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index b5d611cee749..8d50121778a5 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -28,21 +28,7 @@
void udf_free_inode(struct inode *inode)
{
- struct super_block *sb = inode->i_sb;
- struct udf_sb_info *sbi = UDF_SB(sb);
- struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb);
-
- if (lvidiu) {
- mutex_lock(&sbi->s_alloc_mutex);
- if (S_ISDIR(inode->i_mode))
- le32_add_cpu(&lvidiu->numDirs, -1);
- else
- le32_add_cpu(&lvidiu->numFiles, -1);
- udf_updated_lvid(sb);
- mutex_unlock(&sbi->s_alloc_mutex);
- }
-
- udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1);
+ udf_free_blocks(inode->i_sb, NULL, &UDF_I(inode)->i_location, 0, 1);
}
struct inode *udf_new_inode(struct inode *dir, umode_t mode)
@@ -54,7 +40,6 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode)
uint32_t start = UDF_I(dir)->i_location.logicalBlockNum;
struct udf_inode_info *iinfo;
struct udf_inode_info *dinfo = UDF_I(dir);
- struct logicalVolIntegrityDescImpUse *lvidiu;
int err;
inode = new_inode(sb);
@@ -92,20 +77,10 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode)
return ERR_PTR(err);
}
- lvidiu = udf_sb_lvidiu(sb);
- if (lvidiu) {
- iinfo->i_unique = lvid_get_unique_id(sb);
- inode->i_generation = iinfo->i_unique;
- mutex_lock(&sbi->s_alloc_mutex);
- if (S_ISDIR(mode))
- le32_add_cpu(&lvidiu->numDirs, 1);
- else
- le32_add_cpu(&lvidiu->numFiles, 1);
- udf_updated_lvid(sb);
- mutex_unlock(&sbi->s_alloc_mutex);
- }
+ iinfo->i_unique = lvid_get_unique_id(sb);
+ inode->i_generation = iinfo->i_unique;
- inode_init_owner(&init_user_ns, inode, dir, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_SET))
inode->i_uid = sbi->s_uid;
if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET))
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 1d7c2a812fc1..3b2adf4cbc57 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -52,21 +52,24 @@
#define FE_DELETE_PERMS (FE_PERM_U_DELETE | FE_PERM_G_DELETE | \
FE_PERM_O_DELETE)
+struct udf_map_rq;
+
static umode_t udf_convert_permissions(struct fileEntry *);
static int udf_update_inode(struct inode *, int);
static int udf_sync_inode(struct inode *inode);
static int udf_alloc_i_data(struct inode *inode, size_t size);
-static sector_t inode_getblk(struct inode *, sector_t, int *, int *);
-static int8_t udf_insert_aext(struct inode *, struct extent_position,
- struct kernel_lb_addr, uint32_t);
+static int inode_getblk(struct inode *inode, struct udf_map_rq *map);
+static int udf_insert_aext(struct inode *, struct extent_position,
+ struct kernel_lb_addr, uint32_t);
static void udf_split_extents(struct inode *, int *, int, udf_pblk_t,
struct kernel_long_ad *, int *);
static void udf_prealloc_extents(struct inode *, int, int,
struct kernel_long_ad *, int *);
static void udf_merge_extents(struct inode *, struct kernel_long_ad *, int *);
-static void udf_update_extents(struct inode *, struct kernel_long_ad *, int,
- int, struct extent_position *);
-static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
+static int udf_update_extents(struct inode *, struct kernel_long_ad *, int,
+ int, struct extent_position *);
+static int udf_get_block_wb(struct inode *inode, sector_t block,
+ struct buffer_head *bh_result, int create);
static void __udf_clear_extent_cache(struct inode *inode)
{
@@ -182,14 +185,56 @@ static void udf_write_failed(struct address_space *mapping, loff_t to)
}
}
+static int udf_adinicb_writepage(struct page *page,
+ struct writeback_control *wbc, void *data)
+{
+ struct inode *inode = page->mapping->host;
+ struct udf_inode_info *iinfo = UDF_I(inode);
+
+ BUG_ON(!PageLocked(page));
+ memcpy_to_page(page, 0, iinfo->i_data + iinfo->i_lenEAttr,
+ i_size_read(inode));
+ unlock_page(page);
+ mark_inode_dirty(inode);
+
+ return 0;
+}
+
static int udf_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
+ struct writeback_control *wbc)
{
- return mpage_writepages(mapping, wbc, udf_get_block);
+ struct inode *inode = mapping->host;
+ struct udf_inode_info *iinfo = UDF_I(inode);
+
+ if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB)
+ return mpage_writepages(mapping, wbc, udf_get_block_wb);
+ return write_cache_pages(mapping, wbc, udf_adinicb_writepage, NULL);
+}
+
+static void udf_adinicb_readpage(struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ char *kaddr;
+ struct udf_inode_info *iinfo = UDF_I(inode);
+ loff_t isize = i_size_read(inode);
+
+ kaddr = kmap_local_page(page);
+ memcpy(kaddr, iinfo->i_data + iinfo->i_lenEAttr, isize);
+ memset(kaddr + isize, 0, PAGE_SIZE - isize);
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ kunmap_local(kaddr);
}
static int udf_read_folio(struct file *file, struct folio *folio)
{
+ struct udf_inode_info *iinfo = UDF_I(file_inode(file));
+
+ if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
+ udf_adinicb_readpage(&folio->page);
+ folio_unlock(folio);
+ return 0;
+ }
return mpage_read_folio(folio, udf_get_block);
}
@@ -199,15 +244,49 @@ static void udf_readahead(struct readahead_control *rac)
}
static int udf_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ loff_t pos, unsigned len,
+ struct page **pagep, void **fsdata)
{
+ struct udf_inode_info *iinfo = UDF_I(file_inode(file));
+ struct page *page;
int ret;
- ret = block_write_begin(mapping, pos, len, pagep, udf_get_block);
- if (unlikely(ret))
- udf_write_failed(mapping, pos + len);
- return ret;
+ if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
+ ret = block_write_begin(mapping, pos, len, pagep,
+ udf_get_block);
+ if (unlikely(ret))
+ udf_write_failed(mapping, pos + len);
+ return ret;
+ }
+ if (WARN_ON_ONCE(pos >= PAGE_SIZE))
+ return -EIO;
+ page = grab_cache_page_write_begin(mapping, 0);
+ if (!page)
+ return -ENOMEM;
+ *pagep = page;
+ if (!PageUptodate(page))
+ udf_adinicb_readpage(page);
+ return 0;
+}
+
+static int udf_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
+{
+ struct inode *inode = file_inode(file);
+ loff_t last_pos;
+
+ if (UDF_I(inode)->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB)
+ return generic_write_end(file, mapping, pos, len, copied, page,
+ fsdata);
+ last_pos = pos + copied;
+ if (last_pos > inode->i_size)
+ i_size_write(inode, last_pos);
+ set_page_dirty(page);
+ unlock_page(page);
+ put_page(page);
+
+ return copied;
}
static ssize_t udf_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
@@ -218,6 +297,9 @@ static ssize_t udf_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
size_t count = iov_iter_count(iter);
ssize_t ret;
+ /* Fallback to buffered IO for in-ICB files */
+ if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
+ return 0;
ret = blockdev_direct_IO(iocb, inode, iter, udf_get_block);
if (unlikely(ret < 0 && iov_iter_rw(iter) == WRITE))
udf_write_failed(mapping, iocb->ki_pos + count);
@@ -226,6 +308,10 @@ static ssize_t udf_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
static sector_t udf_bmap(struct address_space *mapping, sector_t block)
{
+ struct udf_inode_info *iinfo = UDF_I(mapping->host);
+
+ if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
+ return -EINVAL;
return generic_block_bmap(mapping, block, udf_get_block);
}
@@ -236,7 +322,7 @@ const struct address_space_operations udf_aops = {
.readahead = udf_readahead,
.writepages = udf_writepages,
.write_begin = udf_write_begin,
- .write_end = generic_write_end,
+ .write_end = udf_write_end,
.direct_IO = udf_direct_IO,
.bmap = udf_bmap,
.migrate_folio = buffer_migrate_folio,
@@ -245,18 +331,17 @@ const struct address_space_operations udf_aops = {
/*
* Expand file stored in ICB to a normal one-block-file
*
- * This function requires i_data_sem for writing and releases it.
* This function requires i_mutex held
*/
int udf_expand_file_adinicb(struct inode *inode)
{
struct page *page;
- char *kaddr;
struct udf_inode_info *iinfo = UDF_I(inode);
int err;
WARN_ON_ONCE(!inode_is_locked(inode));
if (!iinfo->i_lenAlloc) {
+ down_write(&iinfo->i_data_sem);
if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT;
else
@@ -267,26 +352,13 @@ int udf_expand_file_adinicb(struct inode *inode)
mark_inode_dirty(inode);
return 0;
}
- /*
- * Release i_data_sem so that we can lock a page - page lock ranks
- * above i_data_sem. i_mutex still protects us against file changes.
- */
- up_write(&iinfo->i_data_sem);
page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
if (!page)
return -ENOMEM;
- if (!PageUptodate(page)) {
- kaddr = kmap_atomic(page);
- memset(kaddr + iinfo->i_lenAlloc, 0x00,
- PAGE_SIZE - iinfo->i_lenAlloc);
- memcpy(kaddr, iinfo->i_data + iinfo->i_lenEAttr,
- iinfo->i_lenAlloc);
- flush_dcache_page(page);
- SetPageUptodate(page);
- kunmap_atomic(kaddr);
- }
+ if (!PageUptodate(page))
+ udf_adinicb_readpage(page);
down_write(&iinfo->i_data_sem);
memset(iinfo->i_data + iinfo->i_lenEAttr, 0x00,
iinfo->i_lenAlloc);
@@ -295,8 +367,6 @@ int udf_expand_file_adinicb(struct inode *inode)
iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT;
else
iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
- /* from now on we have normal address_space methods */
- inode->i_data.a_ops = &udf_aops;
set_page_dirty(page);
unlock_page(page);
up_write(&iinfo->i_data_sem);
@@ -305,12 +375,10 @@ int udf_expand_file_adinicb(struct inode *inode)
/* Restore everything back so that we don't lose data... */
lock_page(page);
down_write(&iinfo->i_data_sem);
- kaddr = kmap_atomic(page);
- memcpy(iinfo->i_data + iinfo->i_lenEAttr, kaddr, inode->i_size);
- kunmap_atomic(kaddr);
+ memcpy_to_page(page, 0, iinfo->i_data + iinfo->i_lenEAttr,
+ inode->i_size);
unlock_page(page);
iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
- inode->i_data.a_ops = &udf_adinicb_aops;
iinfo->i_lenAlloc = inode->i_size;
up_write(&iinfo->i_data_sem);
}
@@ -320,162 +388,103 @@ int udf_expand_file_adinicb(struct inode *inode)
return err;
}
-struct buffer_head *udf_expand_dir_adinicb(struct inode *inode,
- udf_pblk_t *block, int *err)
-{
- udf_pblk_t newblock;
- struct buffer_head *dbh = NULL;
- struct kernel_lb_addr eloc;
- uint8_t alloctype;
- struct extent_position epos;
+#define UDF_MAP_CREATE 0x01 /* Mapping can allocate new blocks */
+#define UDF_MAP_NOPREALLOC 0x02 /* Do not preallocate blocks */
- struct udf_fileident_bh sfibh, dfibh;
- loff_t f_pos = udf_ext0_offset(inode);
- int size = udf_ext0_offset(inode) + inode->i_size;
- struct fileIdentDesc cfi, *sfi, *dfi;
- struct udf_inode_info *iinfo = UDF_I(inode);
+#define UDF_BLK_MAPPED 0x01 /* Block was successfully mapped */
+#define UDF_BLK_NEW 0x02 /* Block was freshly allocated */
- if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
- alloctype = ICBTAG_FLAG_AD_SHORT;
- else
- alloctype = ICBTAG_FLAG_AD_LONG;
+struct udf_map_rq {
+ sector_t lblk;
+ udf_pblk_t pblk;
+ int iflags; /* UDF_MAP_ flags determining behavior */
+ int oflags; /* UDF_BLK_ flags reporting results */
+};
- if (!inode->i_size) {
- iinfo->i_alloc_type = alloctype;
- mark_inode_dirty(inode);
- return NULL;
- }
+static int udf_map_block(struct inode *inode, struct udf_map_rq *map)
+{
+ int err;
+ struct udf_inode_info *iinfo = UDF_I(inode);
- /* alloc block, and copy data to it */
- *block = udf_new_block(inode->i_sb, inode,
- iinfo->i_location.partitionReferenceNum,
- iinfo->i_location.logicalBlockNum, err);
- if (!(*block))
- return NULL;
- newblock = udf_get_pblock(inode->i_sb, *block,
- iinfo->i_location.partitionReferenceNum,
- 0);
- if (!newblock)
- return NULL;
- dbh = udf_tgetblk(inode->i_sb, newblock);
- if (!dbh)
- return NULL;
- lock_buffer(dbh);
- memset(dbh->b_data, 0x00, inode->i_sb->s_blocksize);
- set_buffer_uptodate(dbh);
- unlock_buffer(dbh);
- mark_buffer_dirty_inode(dbh, inode);
-
- sfibh.soffset = sfibh.eoffset =
- f_pos & (inode->i_sb->s_blocksize - 1);
- sfibh.sbh = sfibh.ebh = NULL;
- dfibh.soffset = dfibh.eoffset = 0;
- dfibh.sbh = dfibh.ebh = dbh;
- while (f_pos < size) {
- iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
- sfi = udf_fileident_read(inode, &f_pos, &sfibh, &cfi, NULL,
- NULL, NULL, NULL);
- if (!sfi) {
- brelse(dbh);
- return NULL;
- }
- iinfo->i_alloc_type = alloctype;
- sfi->descTag.tagLocation = cpu_to_le32(*block);
- dfibh.soffset = dfibh.eoffset;
- dfibh.eoffset += (sfibh.eoffset - sfibh.soffset);
- dfi = (struct fileIdentDesc *)(dbh->b_data + dfibh.soffset);
- if (udf_write_fi(inode, sfi, dfi, &dfibh, sfi->impUse,
- udf_get_fi_ident(sfi))) {
- iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
- brelse(dbh);
- return NULL;
+ map->oflags = 0;
+ if (!(map->iflags & UDF_MAP_CREATE)) {
+ struct kernel_lb_addr eloc;
+ uint32_t elen;
+ sector_t offset;
+ struct extent_position epos = {};
+
+ down_read(&iinfo->i_data_sem);
+ if (inode_bmap(inode, map->lblk, &epos, &eloc, &elen, &offset)
+ == (EXT_RECORDED_ALLOCATED >> 30)) {
+ map->pblk = udf_get_lb_pblock(inode->i_sb, &eloc,
+ offset);
+ map->oflags |= UDF_BLK_MAPPED;
}
- }
- mark_buffer_dirty_inode(dbh, inode);
+ up_read(&iinfo->i_data_sem);
+ brelse(epos.bh);
- memset(iinfo->i_data + iinfo->i_lenEAttr, 0, iinfo->i_lenAlloc);
- iinfo->i_lenAlloc = 0;
- eloc.logicalBlockNum = *block;
- eloc.partitionReferenceNum =
- iinfo->i_location.partitionReferenceNum;
- iinfo->i_lenExtents = inode->i_size;
- epos.bh = NULL;
- epos.block = iinfo->i_location;
- epos.offset = udf_file_entry_alloc_offset(inode);
- udf_add_aext(inode, &epos, &eloc, inode->i_size, 0);
- /* UniqueID stuff */
-
- brelse(epos.bh);
- mark_inode_dirty(inode);
- return dbh;
-}
-
-static int udf_get_block(struct inode *inode, sector_t block,
- struct buffer_head *bh_result, int create)
-{
- int err, new;
- sector_t phys = 0;
- struct udf_inode_info *iinfo;
-
- if (!create) {
- phys = udf_block_map(inode, block);
- if (phys)
- map_bh(bh_result, inode->i_sb, phys);
return 0;
}
- err = -EIO;
- new = 0;
- iinfo = UDF_I(inode);
-
down_write(&iinfo->i_data_sem);
- if (block == iinfo->i_next_alloc_block + 1) {
- iinfo->i_next_alloc_block++;
- iinfo->i_next_alloc_goal++;
- }
-
/*
* Block beyond EOF and prealloc extents? Just discard preallocation
* as it is not useful and complicates things.
*/
- if (((loff_t)block) << inode->i_blkbits > iinfo->i_lenExtents)
+ if (((loff_t)map->lblk) << inode->i_blkbits >= iinfo->i_lenExtents)
udf_discard_prealloc(inode);
udf_clear_extent_cache(inode);
- phys = inode_getblk(inode, block, &err, &new);
- if (!phys)
- goto abort;
-
- if (new)
- set_buffer_new(bh_result);
- map_bh(bh_result, inode->i_sb, phys);
-
-abort:
+ err = inode_getblk(inode, map);
up_write(&iinfo->i_data_sem);
return err;
}
-static struct buffer_head *udf_getblk(struct inode *inode, udf_pblk_t block,
- int create, int *err)
+static int __udf_get_block(struct inode *inode, sector_t block,
+ struct buffer_head *bh_result, int flags)
{
- struct buffer_head *bh;
- struct buffer_head dummy;
-
- dummy.b_state = 0;
- dummy.b_blocknr = -1000;
- *err = udf_get_block(inode, block, &dummy, create);
- if (!*err && buffer_mapped(&dummy)) {
- bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
- if (buffer_new(&dummy)) {
- lock_buffer(bh);
- memset(bh->b_data, 0x00, inode->i_sb->s_blocksize);
- set_buffer_uptodate(bh);
- unlock_buffer(bh);
- mark_buffer_dirty_inode(bh, inode);
- }
- return bh;
+ int err;
+ struct udf_map_rq map = {
+ .lblk = block,
+ .iflags = flags,
+ };
+
+ err = udf_map_block(inode, &map);
+ if (err < 0)
+ return err;
+ if (map.oflags & UDF_BLK_MAPPED) {
+ map_bh(bh_result, inode->i_sb, map.pblk);
+ if (map.oflags & UDF_BLK_NEW)
+ set_buffer_new(bh_result);
}
+ return 0;
+}
- return NULL;
+int udf_get_block(struct inode *inode, sector_t block,
+ struct buffer_head *bh_result, int create)
+{
+ int flags = create ? UDF_MAP_CREATE : 0;
+
+ /*
+ * We preallocate blocks only for regular files. It also makes sense
+ * for directories but there's a problem when to drop the
+ * preallocation. We might use some delayed work for that but I feel
+ * it's overengineering for a filesystem like UDF.
+ */
+ if (!S_ISREG(inode->i_mode))
+ flags |= UDF_MAP_NOPREALLOC;
+ return __udf_get_block(inode, block, bh_result, flags);
+}
+
+/*
+ * We shouldn't be allocating blocks on page writeback since we allocate them
+ * on page fault. We can spot dirty buffers without allocated blocks though
+ * when truncate expands file. These however don't have valid data so we can
+ * safely ignore them. So never allocate blocks from page writeback.
+ */
+static int udf_get_block_wb(struct inode *inode, sector_t block,
+ struct buffer_head *bh_result, int create)
+{
+ return __udf_get_block(inode, block, bh_result, 0);
}
/* Extend the file with new blocks totaling 'new_block_bytes',
@@ -509,6 +518,7 @@ static int udf_do_extend_file(struct inode *inode,
~(sb->s_blocksize - 1);
}
+ add = 0;
/* Can we merge with the previous extent? */
if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) ==
EXT_NOT_RECORDED_NOT_ALLOCATED) {
@@ -521,8 +531,10 @@ static int udf_do_extend_file(struct inode *inode,
}
if (fake) {
- udf_add_aext(inode, last_pos, &last_ext->extLocation,
- last_ext->extLength, 1);
+ err = udf_add_aext(inode, last_pos, &last_ext->extLocation,
+ last_ext->extLength, 1);
+ if (err < 0)
+ goto out_err;
count++;
} else {
struct kernel_lb_addr tmploc;
@@ -539,6 +551,7 @@ static int udf_do_extend_file(struct inode *inode,
if (new_block_bytes)
udf_next_aext(inode, last_pos, &tmploc, &tmplen, 0);
}
+ iinfo->i_lenExtents += add;
/* Managed to do everything necessary? */
if (!new_block_bytes)
@@ -556,7 +569,8 @@ static int udf_do_extend_file(struct inode *inode,
err = udf_add_aext(inode, last_pos, &last_ext->extLocation,
last_ext->extLength, 1);
if (err)
- return err;
+ goto out_err;
+ iinfo->i_lenExtents += add;
count++;
}
if (new_block_bytes) {
@@ -565,7 +579,8 @@ static int udf_do_extend_file(struct inode *inode,
err = udf_add_aext(inode, last_pos, &last_ext->extLocation,
last_ext->extLength, 1);
if (err)
- return err;
+ goto out_err;
+ iinfo->i_lenExtents += new_block_bytes;
count++;
}
@@ -579,6 +594,11 @@ out:
return -EIO;
return count;
+out_err:
+ /* Remove extents we've created so far */
+ udf_clear_extent_cache(inode);
+ udf_truncate_extents(inode);
+ return err;
}
/* Extend the final block of the file to final_block_len bytes */
@@ -595,7 +615,7 @@ static void udf_do_extend_final_block(struct inode *inode,
*/
if (new_elen <= (last_ext->extLength & UDF_EXTENT_LENGTH_MASK))
return;
- added_bytes = (last_ext->extLength & UDF_EXTENT_LENGTH_MASK) - new_elen;
+ added_bytes = new_elen - (last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
last_ext->extLength += added_bytes;
UDF_I(inode)->i_lenExtents += added_bytes;
@@ -626,6 +646,7 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
else
BUG();
+ down_write(&iinfo->i_data_sem);
/*
* When creating hole in file, just don't bother with preserving
* preallocation. It likely won't be very useful anyway.
@@ -668,14 +689,13 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
if (err < 0)
goto out;
err = 0;
- iinfo->i_lenExtents = newsize;
out:
brelse(epos.bh);
+ up_write(&iinfo->i_data_sem);
return err;
}
-static sector_t inode_getblk(struct inode *inode, sector_t block,
- int *err, int *new)
+static int inode_getblk(struct inode *inode, struct udf_map_rq *map)
{
struct kernel_long_ad laarr[EXTENT_MERGE_SIZE];
struct extent_position prev_epos, cur_epos, next_epos;
@@ -684,21 +704,20 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
struct kernel_lb_addr eloc, tmpeloc;
int c = 1;
loff_t lbcount = 0, b_off = 0;
- udf_pblk_t newblocknum, newblock;
+ udf_pblk_t newblocknum;
sector_t offset = 0;
int8_t etype;
struct udf_inode_info *iinfo = UDF_I(inode);
udf_pblk_t goal = 0, pgoal = iinfo->i_location.logicalBlockNum;
int lastblock = 0;
bool isBeyondEOF;
+ int ret = 0;
- *err = 0;
- *new = 0;
prev_epos.offset = udf_file_entry_alloc_offset(inode);
prev_epos.block = iinfo->i_location;
prev_epos.bh = NULL;
cur_epos = next_epos = prev_epos;
- b_off = (loff_t)block << inode->i_sb->s_blocksize_bits;
+ b_off = (loff_t)map->lblk << inode->i_sb->s_blocksize_bits;
/* find the extent which contains the block we are looking for.
alternate between laarr[0] and laarr[1] for locations of the
@@ -757,15 +776,18 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
elen = EXT_RECORDED_ALLOCATED |
((elen + inode->i_sb->s_blocksize - 1) &
~(inode->i_sb->s_blocksize - 1));
+ iinfo->i_lenExtents =
+ ALIGN(iinfo->i_lenExtents,
+ inode->i_sb->s_blocksize);
udf_write_aext(inode, &cur_epos, &eloc, elen, 1);
}
- newblock = udf_get_lb_pblock(inode->i_sb, &eloc, offset);
+ map->oflags = UDF_BLK_MAPPED;
+ map->pblk = udf_get_lb_pblock(inode->i_sb, &eloc, offset);
goto out_free;
}
/* Are we beyond EOF and preallocated extent? */
if (etype == -1) {
- int ret;
loff_t hole_len;
isBeyondEOF = true;
@@ -785,27 +807,22 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
/* Create extents for the hole between EOF and offset */
hole_len = (loff_t)offset << inode->i_blkbits;
ret = udf_do_extend_file(inode, &prev_epos, laarr, hole_len);
- if (ret < 0) {
- *err = ret;
- newblock = 0;
+ if (ret < 0)
goto out_free;
- }
c = 0;
offset = 0;
count += ret;
- /* We are not covered by a preallocated extent? */
- if ((laarr[0].extLength & UDF_EXTENT_FLAG_MASK) !=
- EXT_NOT_RECORDED_ALLOCATED) {
- /* Is there any real extent? - otherwise we overwrite
- * the fake one... */
- if (count)
- c = !c;
- laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
- inode->i_sb->s_blocksize;
- memset(&laarr[c].extLocation, 0x00,
- sizeof(struct kernel_lb_addr));
- count++;
- }
+ /*
+ * Is there any real extent? - otherwise we overwrite the fake
+ * one...
+ */
+ if (count)
+ c = !c;
+ laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
+ inode->i_sb->s_blocksize;
+ memset(&laarr[c].extLocation, 0x00,
+ sizeof(struct kernel_lb_addr));
+ count++;
endnum = c + 1;
lastblock = 1;
} else {
@@ -839,7 +856,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
if ((laarr[c].extLength >> 30) == (EXT_NOT_RECORDED_ALLOCATED >> 30))
newblocknum = laarr[c].extLocation.logicalBlockNum + offset;
else { /* otherwise, allocate a new block */
- if (iinfo->i_next_alloc_block == block)
+ if (iinfo->i_next_alloc_block == map->lblk)
goal = iinfo->i_next_alloc_goal;
if (!goal) {
@@ -849,12 +866,9 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
newblocknum = udf_new_block(inode->i_sb, inode,
iinfo->i_location.partitionReferenceNum,
- goal, err);
- if (!newblocknum) {
- *err = -ENOSPC;
- newblock = 0;
+ goal, &ret);
+ if (!newblocknum)
goto out_free;
- }
if (isBeyondEOF)
iinfo->i_lenExtents += inode->i_sb->s_blocksize;
}
@@ -865,11 +879,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
* block */
udf_split_extents(inode, &c, offset, newblocknum, laarr, &endnum);
- /* We preallocate blocks only for regular files. It also makes sense
- * for directories but there's a problem when to drop the
- * preallocation. We might use some delayed work for that but I feel
- * it's overengineering for a filesystem like UDF. */
- if (S_ISREG(inode->i_mode))
+ if (!(map->iflags & UDF_MAP_NOPREALLOC))
udf_prealloc_extents(inode, c, lastblock, laarr, &endnum);
/* merge any continuous blocks in laarr */
@@ -878,28 +888,31 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
/* write back the new extents, inserting new extents if the new number
* of extents is greater than the old number, and deleting extents if
* the new number of extents is less than the old number */
- udf_update_extents(inode, laarr, startnum, endnum, &prev_epos);
+ ret = udf_update_extents(inode, laarr, startnum, endnum, &prev_epos);
+ if (ret < 0)
+ goto out_free;
- newblock = udf_get_pblock(inode->i_sb, newblocknum,
+ map->pblk = udf_get_pblock(inode->i_sb, newblocknum,
iinfo->i_location.partitionReferenceNum, 0);
- if (!newblock) {
- *err = -EIO;
+ if (!map->pblk) {
+ ret = -EFSCORRUPTED;
goto out_free;
}
- *new = 1;
- iinfo->i_next_alloc_block = block;
- iinfo->i_next_alloc_goal = newblocknum;
+ map->oflags = UDF_BLK_NEW | UDF_BLK_MAPPED;
+ iinfo->i_next_alloc_block = map->lblk + 1;
+ iinfo->i_next_alloc_goal = newblocknum + 1;
inode->i_ctime = current_time(inode);
if (IS_SYNC(inode))
udf_sync_inode(inode);
else
mark_inode_dirty(inode);
+ ret = 0;
out_free:
brelse(prev_epos.bh);
brelse(cur_epos.bh);
brelse(next_epos.bh);
- return newblock;
+ return ret;
}
static void udf_split_extents(struct inode *inode, int *c, int offset,
@@ -1082,23 +1095,8 @@ static void udf_merge_extents(struct inode *inode, struct kernel_long_ad *laarr,
blocksize - 1) >> blocksize_bits)))) {
if (((li->extLength & UDF_EXTENT_LENGTH_MASK) +
- (lip1->extLength & UDF_EXTENT_LENGTH_MASK) +
- blocksize - 1) & ~UDF_EXTENT_LENGTH_MASK) {
- lip1->extLength = (lip1->extLength -
- (li->extLength &
- UDF_EXTENT_LENGTH_MASK) +
- UDF_EXTENT_LENGTH_MASK) &
- ~(blocksize - 1);
- li->extLength = (li->extLength &
- UDF_EXTENT_FLAG_MASK) +
- (UDF_EXTENT_LENGTH_MASK + 1) -
- blocksize;
- lip1->extLocation.logicalBlockNum =
- li->extLocation.logicalBlockNum +
- ((li->extLength &
- UDF_EXTENT_LENGTH_MASK) >>
- blocksize_bits);
- } else {
+ (lip1->extLength & UDF_EXTENT_LENGTH_MASK) +
+ blocksize - 1) <= UDF_EXTENT_LENGTH_MASK) {
li->extLength = lip1->extLength +
(((li->extLength &
UDF_EXTENT_LENGTH_MASK) +
@@ -1161,21 +1159,30 @@ static void udf_merge_extents(struct inode *inode, struct kernel_long_ad *laarr,
}
}
-static void udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr,
- int startnum, int endnum,
- struct extent_position *epos)
+static int udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr,
+ int startnum, int endnum,
+ struct extent_position *epos)
{
int start = 0, i;
struct kernel_lb_addr tmploc;
uint32_t tmplen;
+ int err;
if (startnum > endnum) {
for (i = 0; i < (startnum - endnum); i++)
udf_delete_aext(inode, *epos);
} else if (startnum < endnum) {
for (i = 0; i < (endnum - startnum); i++) {
- udf_insert_aext(inode, *epos, laarr[i].extLocation,
- laarr[i].extLength);
+ err = udf_insert_aext(inode, *epos,
+ laarr[i].extLocation,
+ laarr[i].extLength);
+ /*
+ * If we fail here, we are likely corrupting the extent
+ * list and leaking blocks. At least stop early to
+ * limit the damage.
+ */
+ if (err < 0)
+ return err;
udf_next_aext(inode, epos, &laarr[i].extLocation,
&laarr[i].extLength, 1);
start++;
@@ -1187,17 +1194,36 @@ static void udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr
udf_write_aext(inode, epos, &laarr[i].extLocation,
laarr[i].extLength, 1);
}
+ return 0;
}
struct buffer_head *udf_bread(struct inode *inode, udf_pblk_t block,
int create, int *err)
{
struct buffer_head *bh = NULL;
+ struct udf_map_rq map = {
+ .lblk = block,
+ .iflags = UDF_MAP_NOPREALLOC | (create ? UDF_MAP_CREATE : 0),
+ };
- bh = udf_getblk(inode, block, create, err);
- if (!bh)
+ *err = udf_map_block(inode, &map);
+ if (*err || !(map.oflags & UDF_BLK_MAPPED))
return NULL;
+ bh = sb_getblk(inode->i_sb, map.pblk);
+ if (!bh) {
+ *err = -ENOMEM;
+ return NULL;
+ }
+ if (map.oflags & UDF_BLK_NEW) {
+ lock_buffer(bh);
+ memset(bh->b_data, 0x00, inode->i_sb->s_blocksize);
+ set_buffer_uptodate(bh);
+ unlock_buffer(bh);
+ mark_buffer_dirty_inode(bh, inode);
+ return bh;
+ }
+
if (bh_read(bh, 0) >= 0)
return bh;
@@ -1208,7 +1234,7 @@ struct buffer_head *udf_bread(struct inode *inode, udf_pblk_t block,
int udf_setsize(struct inode *inode, loff_t newsize)
{
- int err;
+ int err = 0;
struct udf_inode_info *iinfo;
unsigned int bsize = i_blocksize(inode);
@@ -1218,28 +1244,25 @@ int udf_setsize(struct inode *inode, loff_t newsize)
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
return -EPERM;
+ filemap_invalidate_lock(inode->i_mapping);
iinfo = UDF_I(inode);
if (newsize > inode->i_size) {
- down_write(&iinfo->i_data_sem);
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
- if (bsize <
+ if (bsize >=
(udf_file_entry_alloc_offset(inode) + newsize)) {
- err = udf_expand_file_adinicb(inode);
- if (err)
- return err;
down_write(&iinfo->i_data_sem);
- } else {
iinfo->i_lenAlloc = newsize;
+ up_write(&iinfo->i_data_sem);
goto set_size;
}
+ err = udf_expand_file_adinicb(inode);
+ if (err)
+ goto out_unlock;
}
err = udf_extend_file(inode, newsize);
- if (err) {
- up_write(&iinfo->i_data_sem);
- return err;
- }
+ if (err)
+ goto out_unlock;
set_size:
- up_write(&iinfo->i_data_sem);
truncate_setsize(inode, newsize);
} else {
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
@@ -1256,14 +1279,14 @@ set_size:
err = block_truncate_page(inode->i_mapping, newsize,
udf_get_block);
if (err)
- return err;
+ goto out_unlock;
truncate_setsize(inode, newsize);
down_write(&iinfo->i_data_sem);
udf_clear_extent_cache(inode);
err = udf_truncate_extents(inode);
up_write(&iinfo->i_data_sem);
if (err)
- return err;
+ goto out_unlock;
}
update_time:
inode->i_mtime = inode->i_ctime = current_time(inode);
@@ -1271,7 +1294,9 @@ update_time:
udf_sync_inode(inode);
else
mark_inode_dirty(inode);
- return 0;
+out_unlock:
+ filemap_invalidate_unlock(inode->i_mapping);
+ return err;
}
/*
@@ -1383,6 +1408,7 @@ reread:
ret = -EIO;
goto out;
}
+ iinfo->i_hidden = hidden_inode;
iinfo->i_unique = 0;
iinfo->i_lenEAttr = 0;
iinfo->i_lenExtents = 0;
@@ -1539,10 +1565,7 @@ reread:
case ICBTAG_FILE_TYPE_REGULAR:
case ICBTAG_FILE_TYPE_UNDEF:
case ICBTAG_FILE_TYPE_VAT20:
- if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
- inode->i_data.a_ops = &udf_adinicb_aops;
- else
- inode->i_data.a_ops = &udf_aops;
+ inode->i_data.a_ops = &udf_aops;
inode->i_op = &udf_file_inode_operations;
inode->i_fop = &udf_file_operations;
inode->i_mode |= S_IFREG;
@@ -1673,7 +1696,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
struct udf_inode_info *iinfo = UDF_I(inode);
- bh = udf_tgetblk(inode->i_sb,
+ bh = sb_getblk(inode->i_sb,
udf_get_lb_pblock(inode->i_sb, &iinfo->i_location, 0));
if (!bh) {
udf_debug("getblk failure\n");
@@ -1718,8 +1741,12 @@ static int udf_update_inode(struct inode *inode, int do_sync)
if (S_ISDIR(inode->i_mode) && inode->i_nlink > 0)
fe->fileLinkCount = cpu_to_le16(inode->i_nlink - 1);
- else
- fe->fileLinkCount = cpu_to_le16(inode->i_nlink);
+ else {
+ if (iinfo->i_hidden)
+ fe->fileLinkCount = cpu_to_le16(0);
+ else
+ fe->fileLinkCount = cpu_to_le16(inode->i_nlink);
+ }
fe->informationLength = cpu_to_le64(inode->i_size);
@@ -1890,8 +1917,13 @@ struct inode *__udf_iget(struct super_block *sb, struct kernel_lb_addr *ino,
if (!inode)
return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
+ if (!(inode->i_state & I_NEW)) {
+ if (UDF_I(inode)->i_hidden != hidden_inode) {
+ iput(inode);
+ return ERR_PTR(-EFSCORRUPTED);
+ }
return inode;
+ }
memcpy(&UDF_I(inode)->i_location, ino, sizeof(struct kernel_lb_addr));
err = udf_read_inode(inode, hidden_inode);
@@ -1924,7 +1956,7 @@ int udf_setup_indirect_aext(struct inode *inode, udf_pblk_t block,
neloc.logicalBlockNum = block;
neloc.partitionReferenceNum = epos->block.partitionReferenceNum;
- bh = udf_tgetblk(sb, udf_get_lb_pblock(sb, &neloc, 0));
+ bh = sb_getblk(sb, udf_get_lb_pblock(sb, &neloc, 0));
if (!bh)
return -EIO;
lock_buffer(bh);
@@ -2141,7 +2173,7 @@ int8_t udf_next_aext(struct inode *inode, struct extent_position *epos,
epos->offset = sizeof(struct allocExtDesc);
brelse(epos->bh);
block = udf_get_lb_pblock(inode->i_sb, &epos->block, 0);
- epos->bh = udf_tread(inode->i_sb, block);
+ epos->bh = sb_bread(inode->i_sb, block);
if (!epos->bh) {
udf_debug("reading block %u failed!\n", block);
return -1;
@@ -2205,12 +2237,13 @@ int8_t udf_current_aext(struct inode *inode, struct extent_position *epos,
return etype;
}
-static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos,
- struct kernel_lb_addr neloc, uint32_t nelen)
+static int udf_insert_aext(struct inode *inode, struct extent_position epos,
+ struct kernel_lb_addr neloc, uint32_t nelen)
{
struct kernel_lb_addr oeloc;
uint32_t oelen;
int8_t etype;
+ int err;
if (epos.bh)
get_bh(epos.bh);
@@ -2220,10 +2253,10 @@ static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos,
neloc = oeloc;
nelen = (etype << 30) | oelen;
}
- udf_add_aext(inode, &epos, &neloc, nelen, 1);
+ err = udf_add_aext(inode, &epos, &neloc, nelen, 1);
brelse(epos.bh);
- return (nelen >> 30);
+ return err;
}
int8_t udf_delete_aext(struct inode *inode, struct extent_position epos)
@@ -2341,28 +2374,3 @@ int8_t inode_bmap(struct inode *inode, sector_t block,
return etype;
}
-
-udf_pblk_t udf_block_map(struct inode *inode, sector_t block)
-{
- struct kernel_lb_addr eloc;
- uint32_t elen;
- sector_t offset;
- struct extent_position epos = {};
- udf_pblk_t ret;
-
- down_read(&UDF_I(inode)->i_data_sem);
-
- if (inode_bmap(inode, block, &epos, &eloc, &elen, &offset) ==
- (EXT_RECORDED_ALLOCATED >> 30))
- ret = udf_get_lb_pblock(inode->i_sb, &eloc, offset);
- else
- ret = 0;
-
- up_read(&UDF_I(inode)->i_data_sem);
- brelse(epos.bh);
-
- if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_VARCONV))
- return udf_fixed_to_variable(ret);
- else
- return ret;
-}
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c
index 46d697172197..c87ed942d076 100644
--- a/fs/udf/lowlevel.c
+++ b/fs/udf/lowlevel.c
@@ -45,7 +45,7 @@ unsigned int udf_get_last_session(struct super_block *sb)
return 0;
}
-unsigned long udf_get_last_block(struct super_block *sb)
+udf_pblk_t udf_get_last_block(struct super_block *sb)
{
struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk);
unsigned long lblock = 0;
@@ -54,8 +54,11 @@ unsigned long udf_get_last_block(struct super_block *sb)
* The cdrom layer call failed or returned obviously bogus value?
* Try using the device size...
*/
- if (!cdi || cdrom_get_last_written(cdi, &lblock) || lblock == 0)
+ if (!cdi || cdrom_get_last_written(cdi, &lblock) || lblock == 0) {
+ if (sb_bdev_nr_blocks(sb) > ~(udf_pblk_t)0)
+ return 0;
lblock = sb_bdev_nr_blocks(sb);
+ }
if (lblock)
return lblock - 1;
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index 1614d308d0f0..3777468d06ce 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -28,22 +28,6 @@
#include "udf_i.h"
#include "udf_sb.h"
-struct buffer_head *udf_tgetblk(struct super_block *sb, udf_pblk_t block)
-{
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV))
- return sb_getblk(sb, udf_fixed_to_variable(block));
- else
- return sb_getblk(sb, block);
-}
-
-struct buffer_head *udf_tread(struct super_block *sb, udf_pblk_t block)
-{
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV))
- return sb_bread(sb, udf_fixed_to_variable(block));
- else
- return sb_bread(sb, block);
-}
-
struct genericFormat *udf_add_extendedattr(struct inode *inode, uint32_t size,
uint32_t type, uint8_t loc)
{
@@ -216,7 +200,7 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
if (block == 0xFFFFFFFF)
return NULL;
- bh = udf_tread(sb, block);
+ bh = sb_bread(sb, block);
if (!bh) {
udf_err(sb, "read failed, block=%u, location=%u\n",
block, location);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 7c95c549dd64..fd20423d3ed2 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -41,283 +41,93 @@ static inline int udf_match(int len1, const unsigned char *name1, int len2,
return !memcmp(name1, name2, len1);
}
-int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
- struct fileIdentDesc *sfi, struct udf_fileident_bh *fibh,
- uint8_t *impuse, uint8_t *fileident)
-{
- uint16_t crclen = fibh->eoffset - fibh->soffset - sizeof(struct tag);
- uint16_t crc;
- int offset;
- uint16_t liu = le16_to_cpu(cfi->lengthOfImpUse);
- uint8_t lfi = cfi->lengthFileIdent;
- int padlen = fibh->eoffset - fibh->soffset - liu - lfi -
- sizeof(struct fileIdentDesc);
- int adinicb = 0;
-
- if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
- adinicb = 1;
-
- offset = fibh->soffset + sizeof(struct fileIdentDesc);
-
- if (impuse) {
- if (adinicb || (offset + liu < 0)) {
- memcpy((uint8_t *)sfi->impUse, impuse, liu);
- } else if (offset >= 0) {
- memcpy(fibh->ebh->b_data + offset, impuse, liu);
- } else {
- memcpy((uint8_t *)sfi->impUse, impuse, -offset);
- memcpy(fibh->ebh->b_data, impuse - offset,
- liu + offset);
- }
- }
-
- offset += liu;
-
- if (fileident) {
- if (adinicb || (offset + lfi < 0)) {
- memcpy(sfi->impUse + liu, fileident, lfi);
- } else if (offset >= 0) {
- memcpy(fibh->ebh->b_data + offset, fileident, lfi);
- } else {
- memcpy(sfi->impUse + liu, fileident, -offset);
- memcpy(fibh->ebh->b_data, fileident - offset,
- lfi + offset);
- }
- }
-
- offset += lfi;
-
- if (adinicb || (offset + padlen < 0)) {
- memset(sfi->impUse + liu + lfi, 0x00, padlen);
- } else if (offset >= 0) {
- memset(fibh->ebh->b_data + offset, 0x00, padlen);
- } else {
- memset(sfi->impUse + liu + lfi, 0x00, -offset);
- memset(fibh->ebh->b_data, 0x00, padlen + offset);
- }
-
- crc = crc_itu_t(0, (uint8_t *)cfi + sizeof(struct tag),
- sizeof(struct fileIdentDesc) - sizeof(struct tag));
-
- if (fibh->sbh == fibh->ebh) {
- crc = crc_itu_t(crc, (uint8_t *)sfi->impUse,
- crclen + sizeof(struct tag) -
- sizeof(struct fileIdentDesc));
- } else if (sizeof(struct fileIdentDesc) >= -fibh->soffset) {
- crc = crc_itu_t(crc, fibh->ebh->b_data +
- sizeof(struct fileIdentDesc) +
- fibh->soffset,
- crclen + sizeof(struct tag) -
- sizeof(struct fileIdentDesc));
- } else {
- crc = crc_itu_t(crc, (uint8_t *)sfi->impUse,
- -fibh->soffset - sizeof(struct fileIdentDesc));
- crc = crc_itu_t(crc, fibh->ebh->b_data, fibh->eoffset);
- }
-
- cfi->descTag.descCRC = cpu_to_le16(crc);
- cfi->descTag.descCRCLength = cpu_to_le16(crclen);
- cfi->descTag.tagChecksum = udf_tag_checksum(&cfi->descTag);
-
- if (adinicb || (sizeof(struct fileIdentDesc) <= -fibh->soffset)) {
- memcpy((uint8_t *)sfi, (uint8_t *)cfi,
- sizeof(struct fileIdentDesc));
- } else {
- memcpy((uint8_t *)sfi, (uint8_t *)cfi, -fibh->soffset);
- memcpy(fibh->ebh->b_data, (uint8_t *)cfi - fibh->soffset,
- sizeof(struct fileIdentDesc) + fibh->soffset);
- }
-
- if (adinicb) {
- mark_inode_dirty(inode);
- } else {
- if (fibh->sbh != fibh->ebh)
- mark_buffer_dirty_inode(fibh->ebh, inode);
- mark_buffer_dirty_inode(fibh->sbh, inode);
- }
- inode_inc_iversion(inode);
-
- return 0;
-}
-
/**
- * udf_find_entry - find entry in given directory.
+ * udf_fiiter_find_entry - find entry in given directory.
*
* @dir: directory inode to search in
* @child: qstr of the name
- * @fibh: buffer head / inode with file identifier descriptor we found
- * @cfi: found file identifier descriptor with given name
+ * @iter: iter to use for searching
*
* This function searches in the directory @dir for a file name @child. When
- * found, @fibh points to the buffer head(s) (bh is NULL for in ICB
- * directories) containing the file identifier descriptor (FID). In that case
- * the function returns pointer to the FID in the buffer or inode - but note
- * that FID may be split among two buffers (blocks) so accessing it via that
- * pointer isn't easily possible. This pointer can be used only as an iterator
- * for other directory manipulation functions. For inspection of the FID @cfi
- * can be used - the found FID is copied there.
+ * found, @iter points to the position in the directory with given entry.
*
- * Returns pointer to FID, NULL when nothing found, or error code.
+ * Returns 0 on success, < 0 on error (including -ENOENT).
*/
-static struct fileIdentDesc *udf_find_entry(struct inode *dir,
- const struct qstr *child,
- struct udf_fileident_bh *fibh,
- struct fileIdentDesc *cfi)
+static int udf_fiiter_find_entry(struct inode *dir, const struct qstr *child,
+ struct udf_fileident_iter *iter)
{
- struct fileIdentDesc *fi = NULL;
- loff_t f_pos;
- udf_pblk_t block;
int flen;
- unsigned char *fname = NULL, *copy_name = NULL;
- unsigned char *nameptr;
- uint8_t lfi;
- uint16_t liu;
- loff_t size;
- struct kernel_lb_addr eloc;
- uint32_t elen;
- sector_t offset;
- struct extent_position epos = {};
- struct udf_inode_info *dinfo = UDF_I(dir);
+ unsigned char *fname = NULL;
+ struct super_block *sb = dir->i_sb;
int isdotdot = child->len == 2 &&
child->name[0] == '.' && child->name[1] == '.';
- struct super_block *sb = dir->i_sb;
-
- size = udf_ext0_offset(dir) + dir->i_size;
- f_pos = udf_ext0_offset(dir);
-
- fibh->sbh = fibh->ebh = NULL;
- fibh->soffset = fibh->eoffset = f_pos & (sb->s_blocksize - 1);
- if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
- if (inode_bmap(dir, f_pos >> sb->s_blocksize_bits, &epos,
- &eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30)) {
- fi = ERR_PTR(-EIO);
- goto out_err;
- }
-
- block = udf_get_lb_pblock(sb, &eloc, offset);
- if ((++offset << sb->s_blocksize_bits) < elen) {
- if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
- epos.offset -= sizeof(struct short_ad);
- else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
- epos.offset -= sizeof(struct long_ad);
- } else
- offset = 0;
-
- fibh->sbh = fibh->ebh = udf_tread(sb, block);
- if (!fibh->sbh) {
- fi = ERR_PTR(-EIO);
- goto out_err;
- }
- }
+ int ret;
fname = kmalloc(UDF_NAME_LEN, GFP_NOFS);
- if (!fname) {
- fi = ERR_PTR(-ENOMEM);
- goto out_err;
- }
-
- while (f_pos < size) {
- fi = udf_fileident_read(dir, &f_pos, fibh, cfi, &epos, &eloc,
- &elen, &offset);
- if (!fi) {
- fi = ERR_PTR(-EIO);
- goto out_err;
- }
-
- liu = le16_to_cpu(cfi->lengthOfImpUse);
- lfi = cfi->lengthFileIdent;
-
- if (fibh->sbh == fibh->ebh) {
- nameptr = udf_get_fi_ident(fi);
- } else {
- int poffset; /* Unpaded ending offset */
-
- poffset = fibh->soffset + sizeof(struct fileIdentDesc) +
- liu + lfi;
-
- if (poffset >= lfi)
- nameptr = (uint8_t *)(fibh->ebh->b_data +
- poffset - lfi);
- else {
- if (!copy_name) {
- copy_name = kmalloc(UDF_NAME_LEN_CS0,
- GFP_NOFS);
- if (!copy_name) {
- fi = ERR_PTR(-ENOMEM);
- goto out_err;
- }
- }
- nameptr = copy_name;
- memcpy(nameptr, udf_get_fi_ident(fi),
- lfi - poffset);
- memcpy(nameptr + lfi - poffset,
- fibh->ebh->b_data, poffset);
- }
- }
+ if (!fname)
+ return -ENOMEM;
- if ((cfi->fileCharacteristics & FID_FILE_CHAR_DELETED) != 0) {
+ for (ret = udf_fiiter_init(iter, dir, 0);
+ !ret && iter->pos < dir->i_size;
+ ret = udf_fiiter_advance(iter)) {
+ if (iter->fi.fileCharacteristics & FID_FILE_CHAR_DELETED) {
if (!UDF_QUERY_FLAG(sb, UDF_FLAG_UNDELETE))
continue;
}
- if ((cfi->fileCharacteristics & FID_FILE_CHAR_HIDDEN) != 0) {
+ if (iter->fi.fileCharacteristics & FID_FILE_CHAR_HIDDEN) {
if (!UDF_QUERY_FLAG(sb, UDF_FLAG_UNHIDE))
continue;
}
- if ((cfi->fileCharacteristics & FID_FILE_CHAR_PARENT) &&
+ if ((iter->fi.fileCharacteristics & FID_FILE_CHAR_PARENT) &&
isdotdot)
goto out_ok;
- if (!lfi)
+ if (!iter->fi.lengthFileIdent)
continue;
- flen = udf_get_filename(sb, nameptr, lfi, fname, UDF_NAME_LEN);
+ flen = udf_get_filename(sb, iter->name,
+ iter->fi.lengthFileIdent, fname, UDF_NAME_LEN);
if (flen < 0) {
- fi = ERR_PTR(flen);
+ ret = flen;
goto out_err;
}
if (udf_match(flen, fname, child->len, child->name))
goto out_ok;
}
+ if (!ret)
+ ret = -ENOENT;
- fi = NULL;
out_err:
- if (fibh->sbh != fibh->ebh)
- brelse(fibh->ebh);
- brelse(fibh->sbh);
+ udf_fiiter_release(iter);
out_ok:
- brelse(epos.bh);
kfree(fname);
- kfree(copy_name);
- return fi;
+ return ret;
}
static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
struct inode *inode = NULL;
- struct fileIdentDesc cfi;
- struct udf_fileident_bh fibh;
- struct fileIdentDesc *fi;
+ struct udf_fileident_iter iter;
+ int err;
if (dentry->d_name.len > UDF_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
- fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
- if (IS_ERR(fi))
- return ERR_CAST(fi);
+ err = udf_fiiter_find_entry(dir, &dentry->d_name, &iter);
+ if (err < 0 && err != -ENOENT)
+ return ERR_PTR(err);
- if (fi) {
+ if (err == 0) {
struct kernel_lb_addr loc;
- if (fibh.sbh != fibh.ebh)
- brelse(fibh.ebh);
- brelse(fibh.sbh);
+ loc = lelb_to_cpu(iter.fi.icb.extLocation);
+ udf_fiiter_release(&iter);
- loc = lelb_to_cpu(cfi.icb.extLocation);
inode = udf_iget(dir->i_sb, &loc);
if (IS_ERR(inode))
return ERR_CAST(inode);
@@ -326,287 +136,249 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
return d_splice_alias(inode, dentry);
}
-static struct fileIdentDesc *udf_add_entry(struct inode *dir,
- struct dentry *dentry,
- struct udf_fileident_bh *fibh,
- struct fileIdentDesc *cfi, int *err)
+static int udf_expand_dir_adinicb(struct inode *inode, udf_pblk_t *block)
{
- struct super_block *sb = dir->i_sb;
- struct fileIdentDesc *fi = NULL;
- unsigned char *name = NULL;
- int namelen;
- loff_t f_pos;
- loff_t size = udf_ext0_offset(dir) + dir->i_size;
- int nfidlen;
- udf_pblk_t block;
+ udf_pblk_t newblock;
+ struct buffer_head *dbh = NULL;
struct kernel_lb_addr eloc;
- uint32_t elen = 0;
- sector_t offset;
- struct extent_position epos = {};
- struct udf_inode_info *dinfo;
+ struct extent_position epos;
+ uint8_t alloctype;
+ struct udf_inode_info *iinfo = UDF_I(inode);
+ struct udf_fileident_iter iter;
+ uint8_t *impuse;
+ int ret;
- fibh->sbh = fibh->ebh = NULL;
- name = kmalloc(UDF_NAME_LEN_CS0, GFP_NOFS);
- if (!name) {
- *err = -ENOMEM;
- goto out_err;
- }
+ if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
+ alloctype = ICBTAG_FLAG_AD_SHORT;
+ else
+ alloctype = ICBTAG_FLAG_AD_LONG;
- if (dentry) {
- if (!dentry->d_name.len) {
- *err = -EINVAL;
- goto out_err;
- }
- namelen = udf_put_filename(sb, dentry->d_name.name,
- dentry->d_name.len,
- name, UDF_NAME_LEN_CS0);
- if (!namelen) {
- *err = -ENAMETOOLONG;
- goto out_err;
- }
- } else {
- namelen = 0;
+ if (!inode->i_size) {
+ iinfo->i_alloc_type = alloctype;
+ mark_inode_dirty(inode);
+ return 0;
}
- nfidlen = ALIGN(sizeof(struct fileIdentDesc) + namelen, UDF_NAME_PAD);
-
- f_pos = udf_ext0_offset(dir);
-
- fibh->soffset = fibh->eoffset = f_pos & (dir->i_sb->s_blocksize - 1);
- dinfo = UDF_I(dir);
- if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
- if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, &epos,
- &eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30)) {
- block = udf_get_lb_pblock(dir->i_sb,
- &dinfo->i_location, 0);
- fibh->soffset = fibh->eoffset = sb->s_blocksize;
- goto add;
- }
- block = udf_get_lb_pblock(dir->i_sb, &eloc, offset);
- if ((++offset << dir->i_sb->s_blocksize_bits) < elen) {
- if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
- epos.offset -= sizeof(struct short_ad);
- else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
- epos.offset -= sizeof(struct long_ad);
- } else
- offset = 0;
-
- fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block);
- if (!fibh->sbh) {
- *err = -EIO;
- goto out_err;
- }
+ /* alloc block, and copy data to it */
+ *block = udf_new_block(inode->i_sb, inode,
+ iinfo->i_location.partitionReferenceNum,
+ iinfo->i_location.logicalBlockNum, &ret);
+ if (!(*block))
+ return ret;
+ newblock = udf_get_pblock(inode->i_sb, *block,
+ iinfo->i_location.partitionReferenceNum,
+ 0);
+ if (newblock == 0xffffffff)
+ return -EFSCORRUPTED;
+ dbh = sb_getblk(inode->i_sb, newblock);
+ if (!dbh)
+ return -ENOMEM;
+ lock_buffer(dbh);
+ memcpy(dbh->b_data, iinfo->i_data, inode->i_size);
+ memset(dbh->b_data + inode->i_size, 0,
+ inode->i_sb->s_blocksize - inode->i_size);
+ set_buffer_uptodate(dbh);
+ unlock_buffer(dbh);
+
+ /* Drop inline data, add block instead */
+ iinfo->i_alloc_type = alloctype;
+ memset(iinfo->i_data + iinfo->i_lenEAttr, 0, iinfo->i_lenAlloc);
+ iinfo->i_lenAlloc = 0;
+ eloc.logicalBlockNum = *block;
+ eloc.partitionReferenceNum =
+ iinfo->i_location.partitionReferenceNum;
+ iinfo->i_lenExtents = inode->i_size;
+ epos.bh = NULL;
+ epos.block = iinfo->i_location;
+ epos.offset = udf_file_entry_alloc_offset(inode);
+ ret = udf_add_aext(inode, &epos, &eloc, inode->i_size, 0);
+ brelse(epos.bh);
+ if (ret < 0) {
+ brelse(dbh);
+ udf_free_blocks(inode->i_sb, inode, &eloc, 0, 1);
+ return ret;
+ }
+ mark_inode_dirty(inode);
- block = dinfo->i_location.logicalBlockNum;
+ /* Now fixup tags in moved directory entries */
+ for (ret = udf_fiiter_init(&iter, inode, 0);
+ !ret && iter.pos < inode->i_size;
+ ret = udf_fiiter_advance(&iter)) {
+ iter.fi.descTag.tagLocation = cpu_to_le32(*block);
+ if (iter.fi.lengthOfImpUse != cpu_to_le16(0))
+ impuse = dbh->b_data + iter.pos +
+ sizeof(struct fileIdentDesc);
+ else
+ impuse = NULL;
+ udf_fiiter_write_fi(&iter, impuse);
}
+ brelse(dbh);
+ /*
+ * We don't expect the iteration to fail as the directory has been
+ * already verified to be correct
+ */
+ WARN_ON_ONCE(ret);
+ udf_fiiter_release(&iter);
- while (f_pos < size) {
- fi = udf_fileident_read(dir, &f_pos, fibh, cfi, &epos, &eloc,
- &elen, &offset);
+ return 0;
+}
- if (!fi) {
- *err = -EIO;
- goto out_err;
- }
+static int udf_fiiter_add_entry(struct inode *dir, struct dentry *dentry,
+ struct udf_fileident_iter *iter)
+{
+ struct udf_inode_info *dinfo = UDF_I(dir);
+ int nfidlen, namelen = 0;
+ int ret;
+ int off, blksize = 1 << dir->i_blkbits;
+ udf_pblk_t block;
+ char name[UDF_NAME_LEN_CS0];
+
+ if (dentry) {
+ if (!dentry->d_name.len)
+ return -EINVAL;
+ namelen = udf_put_filename(dir->i_sb, dentry->d_name.name,
+ dentry->d_name.len,
+ name, UDF_NAME_LEN_CS0);
+ if (!namelen)
+ return -ENAMETOOLONG;
+ }
+ nfidlen = ALIGN(sizeof(struct fileIdentDesc) + namelen, UDF_NAME_PAD);
- if ((cfi->fileCharacteristics & FID_FILE_CHAR_DELETED) != 0) {
- if (udf_dir_entry_len(cfi) == nfidlen) {
- cfi->descTag.tagSerialNum = cpu_to_le16(1);
- cfi->fileVersionNum = cpu_to_le16(1);
- cfi->fileCharacteristics = 0;
- cfi->lengthFileIdent = namelen;
- cfi->lengthOfImpUse = cpu_to_le16(0);
- if (!udf_write_fi(dir, cfi, fi, fibh, NULL,
- name))
- goto out_ok;
- else {
- *err = -EIO;
- goto out_err;
- }
+ for (ret = udf_fiiter_init(iter, dir, 0);
+ !ret && iter->pos < dir->i_size;
+ ret = udf_fiiter_advance(iter)) {
+ if (iter->fi.fileCharacteristics & FID_FILE_CHAR_DELETED) {
+ if (udf_dir_entry_len(&iter->fi) == nfidlen) {
+ iter->fi.descTag.tagSerialNum = cpu_to_le16(1);
+ iter->fi.fileVersionNum = cpu_to_le16(1);
+ iter->fi.fileCharacteristics = 0;
+ iter->fi.lengthFileIdent = namelen;
+ iter->fi.lengthOfImpUse = cpu_to_le16(0);
+ memcpy(iter->namebuf, name, namelen);
+ iter->name = iter->namebuf;
+ return 0;
}
}
}
-
-add:
- f_pos += nfidlen;
-
+ if (ret) {
+ udf_fiiter_release(iter);
+ return ret;
+ }
if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB &&
- sb->s_blocksize - fibh->eoffset < nfidlen) {
- brelse(epos.bh);
- epos.bh = NULL;
- fibh->soffset -= udf_ext0_offset(dir);
- fibh->eoffset -= udf_ext0_offset(dir);
- f_pos -= udf_ext0_offset(dir);
- if (fibh->sbh != fibh->ebh)
- brelse(fibh->ebh);
- brelse(fibh->sbh);
- fibh->sbh = fibh->ebh =
- udf_expand_dir_adinicb(dir, &block, err);
- if (!fibh->sbh)
- goto out_err;
- epos.block = dinfo->i_location;
- epos.offset = udf_file_entry_alloc_offset(dir);
- /* Load extent udf_expand_dir_adinicb() has created */
- udf_current_aext(dir, &epos, &eloc, &elen, 1);
+ blksize - udf_ext0_offset(dir) - iter->pos < nfidlen) {
+ udf_fiiter_release(iter);
+ ret = udf_expand_dir_adinicb(dir, &block);
+ if (ret)
+ return ret;
+ ret = udf_fiiter_init(iter, dir, dir->i_size);
+ if (ret < 0)
+ return ret;
}
- /* Entry fits into current block? */
- if (sb->s_blocksize - fibh->eoffset >= nfidlen) {
- fibh->soffset = fibh->eoffset;
- fibh->eoffset += nfidlen;
- if (fibh->sbh != fibh->ebh) {
- brelse(fibh->sbh);
- fibh->sbh = fibh->ebh;
- }
-
- if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
- block = dinfo->i_location.logicalBlockNum;
- fi = (struct fileIdentDesc *)
- (dinfo->i_data + fibh->soffset -
- udf_ext0_offset(dir) +
- dinfo->i_lenEAttr);
- } else {
- block = eloc.logicalBlockNum +
- ((elen - 1) >>
- dir->i_sb->s_blocksize_bits);
- fi = (struct fileIdentDesc *)
- (fibh->sbh->b_data + fibh->soffset);
- }
+ /* Get blocknumber to use for entry tag */
+ if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
+ block = dinfo->i_location.logicalBlockNum;
} else {
- /* Round up last extent in the file */
- elen = (elen + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1);
- if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
- epos.offset -= sizeof(struct short_ad);
- else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
- epos.offset -= sizeof(struct long_ad);
- udf_write_aext(dir, &epos, &eloc, elen, 1);
- dinfo->i_lenExtents = (dinfo->i_lenExtents + sb->s_blocksize
- - 1) & ~(sb->s_blocksize - 1);
-
- fibh->soffset = fibh->eoffset - sb->s_blocksize;
- fibh->eoffset += nfidlen - sb->s_blocksize;
- if (fibh->sbh != fibh->ebh) {
- brelse(fibh->sbh);
- fibh->sbh = fibh->ebh;
- }
+ block = iter->eloc.logicalBlockNum +
+ ((iter->elen - 1) >> dir->i_blkbits);
+ }
+ off = iter->pos & (blksize - 1);
+ if (!off)
+ off = blksize;
+ /* Entry fits into current block? */
+ if (blksize - udf_ext0_offset(dir) - off >= nfidlen)
+ goto store_fi;
- block = eloc.logicalBlockNum + ((elen - 1) >>
- dir->i_sb->s_blocksize_bits);
- fibh->ebh = udf_bread(dir,
- f_pos >> dir->i_sb->s_blocksize_bits, 1, err);
- if (!fibh->ebh)
- goto out_err;
- /* Extents could have been merged, invalidate our position */
- brelse(epos.bh);
- epos.bh = NULL;
- epos.block = dinfo->i_location;
- epos.offset = udf_file_entry_alloc_offset(dir);
-
- if (!fibh->soffset) {
- /* Find the freshly allocated block */
- while (udf_next_aext(dir, &epos, &eloc, &elen, 1) ==
- (EXT_RECORDED_ALLOCATED >> 30))
- ;
- block = eloc.logicalBlockNum + ((elen - 1) >>
- dir->i_sb->s_blocksize_bits);
- brelse(fibh->sbh);
- fibh->sbh = fibh->ebh;
- fi = (struct fileIdentDesc *)(fibh->sbh->b_data);
- } else {
- fi = (struct fileIdentDesc *)
- (fibh->sbh->b_data + sb->s_blocksize +
- fibh->soffset);
- }
+ ret = udf_fiiter_append_blk(iter);
+ if (ret) {
+ udf_fiiter_release(iter);
+ return ret;
}
- memset(cfi, 0, sizeof(struct fileIdentDesc));
- if (UDF_SB(sb)->s_udfrev >= 0x0200)
- udf_new_tag((char *)cfi, TAG_IDENT_FID, 3, 1, block,
+ /* Entry will be completely in the new block? Update tag location... */
+ if (!(iter->pos & (blksize - 1)))
+ block = iter->eloc.logicalBlockNum +
+ ((iter->elen - 1) >> dir->i_blkbits);
+store_fi:
+ memset(&iter->fi, 0, sizeof(struct fileIdentDesc));
+ if (UDF_SB(dir->i_sb)->s_udfrev >= 0x0200)
+ udf_new_tag((char *)(&iter->fi), TAG_IDENT_FID, 3, 1, block,
sizeof(struct tag));
else
- udf_new_tag((char *)cfi, TAG_IDENT_FID, 2, 1, block,
+ udf_new_tag((char *)(&iter->fi), TAG_IDENT_FID, 2, 1, block,
sizeof(struct tag));
- cfi->fileVersionNum = cpu_to_le16(1);
- cfi->lengthFileIdent = namelen;
- cfi->lengthOfImpUse = cpu_to_le16(0);
- if (!udf_write_fi(dir, cfi, fi, fibh, NULL, name)) {
- dir->i_size += nfidlen;
- if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
- dinfo->i_lenAlloc += nfidlen;
- else {
- /* Find the last extent and truncate it to proper size */
- while (udf_next_aext(dir, &epos, &eloc, &elen, 1) ==
- (EXT_RECORDED_ALLOCATED >> 30))
- ;
- elen -= dinfo->i_lenExtents - dir->i_size;
- if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
- epos.offset -= sizeof(struct short_ad);
- else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
- epos.offset -= sizeof(struct long_ad);
- udf_write_aext(dir, &epos, &eloc, elen, 1);
- dinfo->i_lenExtents = dir->i_size;
- }
-
- mark_inode_dirty(dir);
- goto out_ok;
+ iter->fi.fileVersionNum = cpu_to_le16(1);
+ iter->fi.lengthFileIdent = namelen;
+ iter->fi.lengthOfImpUse = cpu_to_le16(0);
+ memcpy(iter->namebuf, name, namelen);
+ iter->name = iter->namebuf;
+
+ dir->i_size += nfidlen;
+ if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
+ dinfo->i_lenAlloc += nfidlen;
} else {
- *err = -EIO;
- goto out_err;
+ /* Truncate last extent to proper size */
+ udf_fiiter_update_elen(iter, iter->elen -
+ (dinfo->i_lenExtents - dir->i_size));
}
+ mark_inode_dirty(dir);
-out_err:
- fi = NULL;
- if (fibh->sbh != fibh->ebh)
- brelse(fibh->ebh);
- brelse(fibh->sbh);
-out_ok:
- brelse(epos.bh);
- kfree(name);
- return fi;
+ return 0;
}
-static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi,
- struct udf_fileident_bh *fibh,
- struct fileIdentDesc *cfi)
+static void udf_fiiter_delete_entry(struct udf_fileident_iter *iter)
{
- cfi->fileCharacteristics |= FID_FILE_CHAR_DELETED;
+ iter->fi.fileCharacteristics |= FID_FILE_CHAR_DELETED;
+
+ if (UDF_QUERY_FLAG(iter->dir->i_sb, UDF_FLAG_STRICT))
+ memset(&iter->fi.icb, 0x00, sizeof(struct long_ad));
+
+ udf_fiiter_write_fi(iter, NULL);
+}
- if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT))
- memset(&(cfi->icb), 0x00, sizeof(struct long_ad));
+static void udf_add_fid_counter(struct super_block *sb, bool dir, int val)
+{
+ struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb);
- return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL);
+ if (!lvidiu)
+ return;
+ mutex_lock(&UDF_SB(sb)->s_alloc_mutex);
+ if (dir)
+ le32_add_cpu(&lvidiu->numDirs, val);
+ else
+ le32_add_cpu(&lvidiu->numFiles, val);
+ udf_updated_lvid(sb);
+ mutex_unlock(&UDF_SB(sb)->s_alloc_mutex);
}
static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
{
struct udf_inode_info *iinfo = UDF_I(inode);
struct inode *dir = d_inode(dentry->d_parent);
- struct udf_fileident_bh fibh;
- struct fileIdentDesc cfi, *fi;
+ struct udf_fileident_iter iter;
int err;
- fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
- if (unlikely(!fi)) {
+ err = udf_fiiter_add_entry(dir, dentry, &iter);
+ if (err) {
inode_dec_link_count(inode);
discard_new_inode(inode);
return err;
}
- cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
- cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location);
- *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
+ iter.fi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
+ iter.fi.icb.extLocation = cpu_to_lelb(iinfo->i_location);
+ *(__le32 *)((struct allocDescImpUse *)iter.fi.icb.impUse)->impUse =
cpu_to_le32(iinfo->i_unique & 0x00000000FFFFFFFFUL);
- udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
+ udf_fiiter_write_fi(&iter, NULL);
dir->i_ctime = dir->i_mtime = current_time(dir);
mark_inode_dirty(dir);
- if (fibh.sbh != fibh.ebh)
- brelse(fibh.ebh);
- brelse(fibh.sbh);
+ udf_fiiter_release(&iter);
+ udf_add_fid_counter(dir->i_sb, false, 1);
d_instantiate_new(dentry, inode);
return 0;
}
-static int udf_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int udf_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
struct inode *inode = udf_new_inode(dir, mode);
@@ -614,10 +386,7 @@ static int udf_create(struct user_namespace *mnt_userns, struct inode *dir,
if (IS_ERR(inode))
return PTR_ERR(inode);
- if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
- inode->i_data.a_ops = &udf_adinicb_aops;
- else
- inode->i_data.a_ops = &udf_aops;
+ inode->i_data.a_ops = &udf_aops;
inode->i_op = &udf_file_inode_operations;
inode->i_fop = &udf_file_operations;
mark_inode_dirty(inode);
@@ -625,7 +394,7 @@ static int udf_create(struct user_namespace *mnt_userns, struct inode *dir,
return udf_add_nondir(dentry, inode);
}
-static int udf_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+static int udf_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
struct file *file, umode_t mode)
{
struct inode *inode = udf_new_inode(dir, mode);
@@ -633,10 +402,7 @@ static int udf_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
if (IS_ERR(inode))
return PTR_ERR(inode);
- if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
- inode->i_data.a_ops = &udf_adinicb_aops;
- else
- inode->i_data.a_ops = &udf_aops;
+ inode->i_data.a_ops = &udf_aops;
inode->i_op = &udf_file_inode_operations;
inode->i_fop = &udf_file_operations;
mark_inode_dirty(inode);
@@ -645,7 +411,7 @@ static int udf_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
return finish_open_simple(file, 0);
}
-static int udf_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int udf_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct inode *inode;
@@ -661,12 +427,11 @@ static int udf_mknod(struct user_namespace *mnt_userns, struct inode *dir,
return udf_add_nondir(dentry, inode);
}
-static int udf_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int udf_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct inode *inode;
- struct udf_fileident_bh fibh;
- struct fileIdentDesc cfi, *fi;
+ struct udf_fileident_iter iter;
int err;
struct udf_inode_info *dinfo = UDF_I(dir);
struct udf_inode_info *iinfo;
@@ -678,183 +443,113 @@ static int udf_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
iinfo = UDF_I(inode);
inode->i_op = &udf_dir_inode_operations;
inode->i_fop = &udf_dir_operations;
- fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err);
- if (!fi) {
- inode_dec_link_count(inode);
+ err = udf_fiiter_add_entry(inode, NULL, &iter);
+ if (err) {
+ clear_nlink(inode);
discard_new_inode(inode);
- goto out;
+ return err;
}
set_nlink(inode, 2);
- cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
- cfi.icb.extLocation = cpu_to_lelb(dinfo->i_location);
- *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
+ iter.fi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
+ iter.fi.icb.extLocation = cpu_to_lelb(dinfo->i_location);
+ *(__le32 *)((struct allocDescImpUse *)iter.fi.icb.impUse)->impUse =
cpu_to_le32(dinfo->i_unique & 0x00000000FFFFFFFFUL);
- cfi.fileCharacteristics =
+ iter.fi.fileCharacteristics =
FID_FILE_CHAR_DIRECTORY | FID_FILE_CHAR_PARENT;
- udf_write_fi(inode, &cfi, fi, &fibh, NULL, NULL);
- brelse(fibh.sbh);
+ udf_fiiter_write_fi(&iter, NULL);
+ udf_fiiter_release(&iter);
mark_inode_dirty(inode);
- fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
- if (!fi) {
+ err = udf_fiiter_add_entry(dir, dentry, &iter);
+ if (err) {
clear_nlink(inode);
- mark_inode_dirty(inode);
discard_new_inode(inode);
- goto out;
+ return err;
}
- cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
- cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location);
- *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
+ iter.fi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
+ iter.fi.icb.extLocation = cpu_to_lelb(iinfo->i_location);
+ *(__le32 *)((struct allocDescImpUse *)iter.fi.icb.impUse)->impUse =
cpu_to_le32(iinfo->i_unique & 0x00000000FFFFFFFFUL);
- cfi.fileCharacteristics |= FID_FILE_CHAR_DIRECTORY;
- udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
+ iter.fi.fileCharacteristics |= FID_FILE_CHAR_DIRECTORY;
+ udf_fiiter_write_fi(&iter, NULL);
+ udf_fiiter_release(&iter);
+ udf_add_fid_counter(dir->i_sb, true, 1);
inc_nlink(dir);
dir->i_ctime = dir->i_mtime = current_time(dir);
mark_inode_dirty(dir);
d_instantiate_new(dentry, inode);
- if (fibh.sbh != fibh.ebh)
- brelse(fibh.ebh);
- brelse(fibh.sbh);
- err = 0;
-out:
- return err;
+ return 0;
}
static int empty_dir(struct inode *dir)
{
- struct fileIdentDesc *fi, cfi;
- struct udf_fileident_bh fibh;
- loff_t f_pos;
- loff_t size = udf_ext0_offset(dir) + dir->i_size;
- udf_pblk_t block;
- struct kernel_lb_addr eloc;
- uint32_t elen;
- sector_t offset;
- struct extent_position epos = {};
- struct udf_inode_info *dinfo = UDF_I(dir);
-
- f_pos = udf_ext0_offset(dir);
- fibh.soffset = fibh.eoffset = f_pos & (dir->i_sb->s_blocksize - 1);
-
- if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
- fibh.sbh = fibh.ebh = NULL;
- else if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits,
- &epos, &eloc, &elen, &offset) ==
- (EXT_RECORDED_ALLOCATED >> 30)) {
- block = udf_get_lb_pblock(dir->i_sb, &eloc, offset);
- if ((++offset << dir->i_sb->s_blocksize_bits) < elen) {
- if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
- epos.offset -= sizeof(struct short_ad);
- else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
- epos.offset -= sizeof(struct long_ad);
- } else
- offset = 0;
-
- fibh.sbh = fibh.ebh = udf_tread(dir->i_sb, block);
- if (!fibh.sbh) {
- brelse(epos.bh);
+ struct udf_fileident_iter iter;
+ int ret;
+
+ for (ret = udf_fiiter_init(&iter, dir, 0);
+ !ret && iter.pos < dir->i_size;
+ ret = udf_fiiter_advance(&iter)) {
+ if (iter.fi.lengthFileIdent &&
+ !(iter.fi.fileCharacteristics & FID_FILE_CHAR_DELETED)) {
+ udf_fiiter_release(&iter);
return 0;
}
- } else {
- brelse(epos.bh);
- return 0;
}
-
- while (f_pos < size) {
- fi = udf_fileident_read(dir, &f_pos, &fibh, &cfi, &epos, &eloc,
- &elen, &offset);
- if (!fi) {
- if (fibh.sbh != fibh.ebh)
- brelse(fibh.ebh);
- brelse(fibh.sbh);
- brelse(epos.bh);
- return 0;
- }
-
- if (cfi.lengthFileIdent &&
- (cfi.fileCharacteristics & FID_FILE_CHAR_DELETED) == 0) {
- if (fibh.sbh != fibh.ebh)
- brelse(fibh.ebh);
- brelse(fibh.sbh);
- brelse(epos.bh);
- return 0;
- }
- }
-
- if (fibh.sbh != fibh.ebh)
- brelse(fibh.ebh);
- brelse(fibh.sbh);
- brelse(epos.bh);
+ udf_fiiter_release(&iter);
return 1;
}
static int udf_rmdir(struct inode *dir, struct dentry *dentry)
{
- int retval;
+ int ret;
struct inode *inode = d_inode(dentry);
- struct udf_fileident_bh fibh;
- struct fileIdentDesc *fi, cfi;
+ struct udf_fileident_iter iter;
struct kernel_lb_addr tloc;
- retval = -ENOENT;
- fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
- if (IS_ERR_OR_NULL(fi)) {
- if (fi)
- retval = PTR_ERR(fi);
+ ret = udf_fiiter_find_entry(dir, &dentry->d_name, &iter);
+ if (ret)
goto out;
- }
- retval = -EIO;
- tloc = lelb_to_cpu(cfi.icb.extLocation);
+ ret = -EFSCORRUPTED;
+ tloc = lelb_to_cpu(iter.fi.icb.extLocation);
if (udf_get_lb_pblock(dir->i_sb, &tloc, 0) != inode->i_ino)
goto end_rmdir;
- retval = -ENOTEMPTY;
+ ret = -ENOTEMPTY;
if (!empty_dir(inode))
goto end_rmdir;
- retval = udf_delete_entry(dir, fi, &fibh, &cfi);
- if (retval)
- goto end_rmdir;
+ udf_fiiter_delete_entry(&iter);
if (inode->i_nlink != 2)
udf_warn(inode->i_sb, "empty directory has nlink != 2 (%u)\n",
inode->i_nlink);
clear_nlink(inode);
inode->i_size = 0;
inode_dec_link_count(dir);
+ udf_add_fid_counter(dir->i_sb, true, -1);
inode->i_ctime = dir->i_ctime = dir->i_mtime =
current_time(inode);
mark_inode_dirty(dir);
-
+ ret = 0;
end_rmdir:
- if (fibh.sbh != fibh.ebh)
- brelse(fibh.ebh);
- brelse(fibh.sbh);
-
+ udf_fiiter_release(&iter);
out:
- return retval;
+ return ret;
}
static int udf_unlink(struct inode *dir, struct dentry *dentry)
{
- int retval;
+ int ret;
struct inode *inode = d_inode(dentry);
- struct udf_fileident_bh fibh;
- struct fileIdentDesc *fi;
- struct fileIdentDesc cfi;
+ struct udf_fileident_iter iter;
struct kernel_lb_addr tloc;
- retval = -ENOENT;
- fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
-
- if (IS_ERR_OR_NULL(fi)) {
- if (fi)
- retval = PTR_ERR(fi);
+ ret = udf_fiiter_find_entry(dir, &dentry->d_name, &iter);
+ if (ret)
goto out;
- }
- retval = -EIO;
- tloc = lelb_to_cpu(cfi.icb.extLocation);
+ ret = -EFSCORRUPTED;
+ tloc = lelb_to_cpu(iter.fi.icb.extLocation);
if (udf_get_lb_pblock(dir->i_sb, &tloc, 0) != inode->i_ino)
goto end_unlink;
@@ -863,25 +558,20 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
inode->i_ino, inode->i_nlink);
set_nlink(inode, 1);
}
- retval = udf_delete_entry(dir, fi, &fibh, &cfi);
- if (retval)
- goto end_unlink;
+ udf_fiiter_delete_entry(&iter);
dir->i_ctime = dir->i_mtime = current_time(dir);
mark_inode_dirty(dir);
inode_dec_link_count(inode);
+ udf_add_fid_counter(dir->i_sb, false, -1);
inode->i_ctime = dir->i_ctime;
- retval = 0;
-
+ ret = 0;
end_unlink:
- if (fibh.sbh != fibh.ebh)
- brelse(fibh.ebh);
- brelse(fibh.sbh);
-
+ udf_fiiter_release(&iter);
out:
- return retval;
+ return ret;
}
-static int udf_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int udf_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
struct inode *inode = udf_new_inode(dir, S_IFLNK | 0777);
@@ -929,15 +619,20 @@ static int udf_symlink(struct user_namespace *mnt_userns, struct inode *dir,
iinfo->i_location.partitionReferenceNum;
bsize = sb->s_blocksize;
iinfo->i_lenExtents = bsize;
- udf_add_aext(inode, &epos, &eloc, bsize, 0);
+ err = udf_add_aext(inode, &epos, &eloc, bsize, 0);
brelse(epos.bh);
+ if (err < 0) {
+ udf_free_blocks(sb, inode, &eloc, 0, 1);
+ goto out_no_entry;
+ }
block = udf_get_pblock(sb, block,
iinfo->i_location.partitionReferenceNum,
0);
- epos.bh = udf_tgetblk(sb, block);
+ epos.bh = sb_getblk(sb, block);
if (unlikely(!epos.bh)) {
err = -ENOMEM;
+ udf_free_blocks(sb, inode, &eloc, 0, 1);
goto out_no_entry;
}
lock_buffer(epos.bh);
@@ -1038,28 +733,23 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *dentry)
{
struct inode *inode = d_inode(old_dentry);
- struct udf_fileident_bh fibh;
- struct fileIdentDesc cfi, *fi;
+ struct udf_fileident_iter iter;
int err;
- fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
- if (!fi) {
+ err = udf_fiiter_add_entry(dir, dentry, &iter);
+ if (err)
return err;
- }
- cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
- cfi.icb.extLocation = cpu_to_lelb(UDF_I(inode)->i_location);
+ iter.fi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
+ iter.fi.icb.extLocation = cpu_to_lelb(UDF_I(inode)->i_location);
if (UDF_SB(inode->i_sb)->s_lvid_bh) {
- *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
+ *(__le32 *)((struct allocDescImpUse *)iter.fi.icb.impUse)->impUse =
cpu_to_le32(lvid_get_unique_id(inode->i_sb));
}
- udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
- if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
- mark_inode_dirty(dir);
+ udf_fiiter_write_fi(&iter, NULL);
+ udf_fiiter_release(&iter);
- if (fibh.sbh != fibh.ebh)
- brelse(fibh.ebh);
- brelse(fibh.sbh);
inc_nlink(inode);
+ udf_add_fid_counter(dir->i_sb, false, 1);
inode->i_ctime = current_time(inode);
mark_inode_dirty(inode);
dir->i_ctime = dir->i_mtime = current_time(dir);
@@ -1073,84 +763,81 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
/* Anybody can rename anything with this: the permission checks are left to the
* higher-level routines.
*/
-static int udf_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
struct inode *old_inode = d_inode(old_dentry);
struct inode *new_inode = d_inode(new_dentry);
- struct udf_fileident_bh ofibh, nfibh;
- struct fileIdentDesc *ofi = NULL, *nfi = NULL, *dir_fi = NULL;
- struct fileIdentDesc ocfi, ncfi;
- struct buffer_head *dir_bh = NULL;
- int retval = -ENOENT;
+ struct udf_fileident_iter oiter, niter, diriter;
+ bool has_diriter = false;
+ int retval;
struct kernel_lb_addr tloc;
- struct udf_inode_info *old_iinfo = UDF_I(old_inode);
if (flags & ~RENAME_NOREPLACE)
return -EINVAL;
- ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
- if (!ofi || IS_ERR(ofi)) {
- if (IS_ERR(ofi))
- retval = PTR_ERR(ofi);
- goto end_rename;
- }
-
- if (ofibh.sbh != ofibh.ebh)
- brelse(ofibh.ebh);
-
- brelse(ofibh.sbh);
- tloc = lelb_to_cpu(ocfi.icb.extLocation);
- if (udf_get_lb_pblock(old_dir->i_sb, &tloc, 0) != old_inode->i_ino)
- goto end_rename;
+ retval = udf_fiiter_find_entry(old_dir, &old_dentry->d_name, &oiter);
+ if (retval)
+ return retval;
- nfi = udf_find_entry(new_dir, &new_dentry->d_name, &nfibh, &ncfi);
- if (IS_ERR(nfi)) {
- retval = PTR_ERR(nfi);
- goto end_rename;
- }
- if (nfi && !new_inode) {
- if (nfibh.sbh != nfibh.ebh)
- brelse(nfibh.ebh);
- brelse(nfibh.sbh);
- nfi = NULL;
+ tloc = lelb_to_cpu(oiter.fi.icb.extLocation);
+ if (udf_get_lb_pblock(old_dir->i_sb, &tloc, 0) != old_inode->i_ino) {
+ retval = -ENOENT;
+ goto out_oiter;
}
- if (S_ISDIR(old_inode->i_mode)) {
- int offset = udf_ext0_offset(old_inode);
+ if (S_ISDIR(old_inode->i_mode)) {
if (new_inode) {
retval = -ENOTEMPTY;
if (!empty_dir(new_inode))
- goto end_rename;
+ goto out_oiter;
+ }
+ /*
+ * We need to protect against old_inode getting converted from
+ * ICB to normal directory.
+ */
+ inode_lock_nested(old_inode, I_MUTEX_NONDIR2);
+ retval = udf_fiiter_find_entry(old_inode, &dotdot_name,
+ &diriter);
+ if (retval == -ENOENT) {
+ udf_err(old_inode->i_sb,
+ "directory (ino %lu) has no '..' entry\n",
+ old_inode->i_ino);
+ retval = -EFSCORRUPTED;
}
- retval = -EIO;
- if (old_iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
- dir_fi = udf_get_fileident(
- old_iinfo->i_data -
- (old_iinfo->i_efe ?
- sizeof(struct extendedFileEntry) :
- sizeof(struct fileEntry)),
- old_inode->i_sb->s_blocksize, &offset);
- } else {
- dir_bh = udf_bread(old_inode, 0, 0, &retval);
- if (!dir_bh)
- goto end_rename;
- dir_fi = udf_get_fileident(dir_bh->b_data,
- old_inode->i_sb->s_blocksize, &offset);
+ if (retval) {
+ inode_unlock(old_inode);
+ goto out_oiter;
}
- if (!dir_fi)
- goto end_rename;
- tloc = lelb_to_cpu(dir_fi->icb.extLocation);
+ has_diriter = true;
+ tloc = lelb_to_cpu(diriter.fi.icb.extLocation);
if (udf_get_lb_pblock(old_inode->i_sb, &tloc, 0) !=
- old_dir->i_ino)
- goto end_rename;
+ old_dir->i_ino) {
+ retval = -EFSCORRUPTED;
+ udf_err(old_inode->i_sb,
+ "directory (ino %lu) has parent entry pointing to another inode (%lu != %u)\n",
+ old_inode->i_ino, old_dir->i_ino,
+ udf_get_lb_pblock(old_inode->i_sb, &tloc, 0));
+ goto out_oiter;
+ }
+ }
+
+ retval = udf_fiiter_find_entry(new_dir, &new_dentry->d_name, &niter);
+ if (retval && retval != -ENOENT)
+ goto out_oiter;
+ /* Entry found but not passed by VFS? */
+ if (!retval && !new_inode) {
+ retval = -EFSCORRUPTED;
+ udf_fiiter_release(&niter);
+ goto out_oiter;
}
- if (!nfi) {
- nfi = udf_add_entry(new_dir, new_dentry, &nfibh, &ncfi,
- &retval);
- if (!nfi)
- goto end_rename;
+ /* Entry not found? Need to add one... */
+ if (retval) {
+ udf_fiiter_release(&niter);
+ retval = udf_fiiter_add_entry(new_dir, new_dentry, &niter);
+ if (retval)
+ goto out_oiter;
}
/*
@@ -1163,31 +850,46 @@ static int udf_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
/*
* ok, that's it
*/
- ncfi.fileVersionNum = ocfi.fileVersionNum;
- ncfi.fileCharacteristics = ocfi.fileCharacteristics;
- memcpy(&(ncfi.icb), &(ocfi.icb), sizeof(ocfi.icb));
- udf_write_fi(new_dir, &ncfi, nfi, &nfibh, NULL, NULL);
+ niter.fi.fileVersionNum = oiter.fi.fileVersionNum;
+ niter.fi.fileCharacteristics = oiter.fi.fileCharacteristics;
+ memcpy(&(niter.fi.icb), &(oiter.fi.icb), sizeof(oiter.fi.icb));
+ udf_fiiter_write_fi(&niter, NULL);
+ udf_fiiter_release(&niter);
- /* The old fid may have moved - find it again */
- ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
- udf_delete_entry(old_dir, ofi, &ofibh, &ocfi);
+ /*
+ * The old entry may have moved due to new entry allocation. Find it
+ * again.
+ */
+ udf_fiiter_release(&oiter);
+ retval = udf_fiiter_find_entry(old_dir, &old_dentry->d_name, &oiter);
+ if (retval) {
+ udf_err(old_dir->i_sb,
+ "failed to find renamed entry again in directory (ino %lu)\n",
+ old_dir->i_ino);
+ } else {
+ udf_fiiter_delete_entry(&oiter);
+ udf_fiiter_release(&oiter);
+ }
if (new_inode) {
new_inode->i_ctime = current_time(new_inode);
inode_dec_link_count(new_inode);
+ udf_add_fid_counter(old_dir->i_sb, S_ISDIR(new_inode->i_mode),
+ -1);
}
old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir);
new_dir->i_ctime = new_dir->i_mtime = current_time(new_dir);
mark_inode_dirty(old_dir);
mark_inode_dirty(new_dir);
- if (dir_fi) {
- dir_fi->icb.extLocation = cpu_to_lelb(UDF_I(new_dir)->i_location);
- udf_update_tag((char *)dir_fi, udf_dir_entry_len(dir_fi));
- if (old_iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
- mark_inode_dirty(old_inode);
- else
- mark_buffer_dirty_inode(dir_bh, old_inode);
+ if (has_diriter) {
+ diriter.fi.icb.extLocation =
+ cpu_to_lelb(UDF_I(new_dir)->i_location);
+ udf_update_tag((char *)&diriter.fi,
+ udf_dir_entry_len(&diriter.fi));
+ udf_fiiter_write_fi(&diriter, NULL);
+ udf_fiiter_release(&diriter);
+ inode_unlock(old_inode);
inode_dec_link_count(old_dir);
if (new_inode)
@@ -1197,22 +899,13 @@ static int udf_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
mark_inode_dirty(new_dir);
}
}
-
- if (ofi) {
- if (ofibh.sbh != ofibh.ebh)
- brelse(ofibh.ebh);
- brelse(ofibh.sbh);
- }
-
- retval = 0;
-
-end_rename:
- brelse(dir_bh);
- if (nfi) {
- if (nfibh.sbh != nfibh.ebh)
- brelse(nfibh.ebh);
- brelse(nfibh.sbh);
+ return 0;
+out_oiter:
+ if (has_diriter) {
+ udf_fiiter_release(&diriter);
+ inode_unlock(old_inode);
}
+ udf_fiiter_release(&oiter);
return retval;
}
@@ -1221,17 +914,15 @@ static struct dentry *udf_get_parent(struct dentry *child)
{
struct kernel_lb_addr tloc;
struct inode *inode = NULL;
- struct fileIdentDesc cfi;
- struct udf_fileident_bh fibh;
-
- if (!udf_find_entry(d_inode(child), &dotdot_name, &fibh, &cfi))
- return ERR_PTR(-EACCES);
+ struct udf_fileident_iter iter;
+ int err;
- if (fibh.sbh != fibh.ebh)
- brelse(fibh.ebh);
- brelse(fibh.sbh);
+ err = udf_fiiter_find_entry(d_inode(child), &dotdot_name, &iter);
+ if (err)
+ return ERR_PTR(err);
- tloc = lelb_to_cpu(cfi.icb.extLocation);
+ tloc = lelb_to_cpu(iter.fi.icb.extLocation);
+ udf_fiiter_release(&iter);
inode = udf_iget(child->d_sb, &tloc);
if (IS_ERR(inode))
return ERR_CAST(inode);
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index 4cbf40575965..5bcfe78d5cab 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -54,6 +54,7 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block,
struct udf_part_map *map;
struct udf_virtual_data *vdata;
struct udf_inode_info *iinfo = UDF_I(sbi->s_vat_inode);
+ int err;
map = &sbi->s_partmaps[partition];
vdata = &map->s_type_specific.s_virtual;
@@ -79,12 +80,10 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block,
index = vdata->s_start_offset / sizeof(uint32_t) + block;
}
- loc = udf_block_map(sbi->s_vat_inode, newblock);
-
- bh = sb_bread(sb, loc);
+ bh = udf_bread(sbi->s_vat_inode, newblock, 0, &err);
if (!bh) {
- udf_debug("get_pblock(UDF_VIRTUAL_MAP:%p,%u,%u) VAT: %u[%u]\n",
- sb, block, partition, loc, index);
+ udf_debug("get_pblock(UDF_VIRTUAL_MAP:%p,%u,%u)\n",
+ sb, block, partition);
return 0xFFFFFFFF;
}
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 06eda8177b5f..6304e3c5c3d9 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -86,6 +86,13 @@ enum {
#define UDF_MAX_LVID_NESTING 1000
enum { UDF_MAX_LINKS = 0xffff };
+/*
+ * We limit filesize to 4TB. This is arbitrary as the on-disk format supports
+ * more but because the file space is described by a linked list of extents,
+ * each of which can have at most 1GB, the creation and handling of extents
+ * gets unusably slow beyond certain point...
+ */
+#define UDF_MAX_FILESIZE (1ULL << 42)
/* These are the "meat" - everything else is stuffing */
static int udf_fill_super(struct super_block *, void *, int);
@@ -147,6 +154,7 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
ei->i_next_alloc_goal = 0;
ei->i_strat4096 = 0;
ei->i_streamdir = 0;
+ ei->i_hidden = 0;
init_rwsem(&ei->i_data_sem);
ei->cached_extent.lstart = -1;
spin_lock_init(&ei->i_extent_cache_lock);
@@ -733,7 +741,7 @@ static int udf_check_vsd(struct super_block *sb)
* added */
for (; !nsr && sector < VSD_MAX_SECTOR_OFFSET; sector += sectorsize) {
/* Read a block */
- bh = udf_tread(sb, sector >> sb->s_blocksize_bits);
+ bh = sb_bread(sb, sector >> sb->s_blocksize_bits);
if (!bh)
break;
@@ -1175,7 +1183,6 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
struct udf_part_map *map = &sbi->s_partmaps[p_index];
struct buffer_head *bh = NULL;
struct udf_inode_info *vati;
- uint32_t pos;
struct virtualAllocationTable20 *vat20;
sector_t blocks = sb_bdev_nr_blocks(sb);
@@ -1197,10 +1204,14 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
} else if (map->s_partition_type == UDF_VIRTUAL_MAP20) {
vati = UDF_I(sbi->s_vat_inode);
if (vati->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
- pos = udf_block_map(sbi->s_vat_inode, 0);
- bh = sb_bread(sb, pos);
- if (!bh)
- return -EIO;
+ int err = 0;
+
+ bh = udf_bread(sbi->s_vat_inode, 0, 0, &err);
+ if (!bh) {
+ if (!err)
+ err = -EFSCORRUPTED;
+ return err;
+ }
vat20 = (struct virtualAllocationTable20 *)bh->b_data;
} else {
vat20 = (struct virtualAllocationTable20 *)
@@ -1838,10 +1849,6 @@ static int udf_check_anchor_block(struct super_block *sb, sector_t block,
uint16_t ident;
int ret;
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV) &&
- udf_fixed_to_variable(block) >= sb_bdev_nr_blocks(sb))
- return -EAGAIN;
-
bh = udf_read_tagged(sb, block, block, &ident);
if (!bh)
return -EAGAIN;
@@ -1860,10 +1867,10 @@ static int udf_check_anchor_block(struct super_block *sb, sector_t block,
* Returns < 0 on error, 0 on success. -EAGAIN is special - try next set
* of anchors.
*/
-static int udf_scan_anchors(struct super_block *sb, sector_t *lastblock,
+static int udf_scan_anchors(struct super_block *sb, udf_pblk_t *lastblock,
struct kernel_lb_addr *fileset)
{
- sector_t last[6];
+ udf_pblk_t last[6];
int i;
struct udf_sb_info *sbi = UDF_SB(sb);
int last_count = 0;
@@ -1924,46 +1931,6 @@ static int udf_scan_anchors(struct super_block *sb, sector_t *lastblock,
}
/*
- * Find an anchor volume descriptor and load Volume Descriptor Sequence from
- * area specified by it. The function expects sbi->s_lastblock to be the last
- * block on the media.
- *
- * Return <0 on error, 0 if anchor found. -EAGAIN is special meaning anchor
- * was not found.
- */
-static int udf_find_anchor(struct super_block *sb,
- struct kernel_lb_addr *fileset)
-{
- struct udf_sb_info *sbi = UDF_SB(sb);
- sector_t lastblock = sbi->s_last_block;
- int ret;
-
- ret = udf_scan_anchors(sb, &lastblock, fileset);
- if (ret != -EAGAIN)
- goto out;
-
- /* No anchor found? Try VARCONV conversion of block numbers */
- UDF_SET_FLAG(sb, UDF_FLAG_VARCONV);
- lastblock = udf_variable_to_fixed(sbi->s_last_block);
- /* Firstly, we try to not convert number of the last block */
- ret = udf_scan_anchors(sb, &lastblock, fileset);
- if (ret != -EAGAIN)
- goto out;
-
- lastblock = sbi->s_last_block;
- /* Secondly, we try with converted number of the last block */
- ret = udf_scan_anchors(sb, &lastblock, fileset);
- if (ret < 0) {
- /* VARCONV didn't help. Clear it. */
- UDF_CLEAR_FLAG(sb, UDF_FLAG_VARCONV);
- }
-out:
- if (ret == 0)
- sbi->s_last_block = lastblock;
- return ret;
-}
-
-/*
* Check Volume Structure Descriptor, find Anchor block and load Volume
* Descriptor Sequence.
*
@@ -2003,7 +1970,7 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
/* Look for anchor block and load Volume Descriptor Sequence */
sbi->s_anchor = uopt->anchor;
- ret = udf_find_anchor(sb, fileset);
+ ret = udf_scan_anchors(sb, &sbi->s_last_block, fileset);
if (ret < 0) {
if (!silent && ret == -EAGAIN)
udf_warn(sb, "No anchor found\n");
@@ -2297,7 +2264,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
ret = -ENOMEM;
goto error_out;
}
- sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sb->s_maxbytes = UDF_MAX_FILESIZE;
sb->s_max_links = UDF_MAX_LINKS;
return 0;
@@ -2454,7 +2421,7 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
if (bytes) {
brelse(bh);
newblock = udf_get_lb_pblock(sb, &loc, ++block);
- bh = udf_tread(sb, newblock);
+ bh = sb_bread(sb, newblock);
if (!bh) {
udf_debug("read failed\n");
goto out;
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index f3642f9c23f8..a34c8c4e6d21 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -107,53 +107,45 @@ static int udf_symlink_filler(struct file *file, struct folio *folio)
struct inode *inode = page->mapping->host;
struct buffer_head *bh = NULL;
unsigned char *symlink;
- int err;
+ int err = 0;
unsigned char *p = page_address(page);
- struct udf_inode_info *iinfo;
- uint32_t pos;
+ struct udf_inode_info *iinfo = UDF_I(inode);
/* We don't support symlinks longer than one block */
if (inode->i_size > inode->i_sb->s_blocksize) {
err = -ENAMETOOLONG;
- goto out_unmap;
+ goto out_unlock;
}
- iinfo = UDF_I(inode);
- pos = udf_block_map(inode, 0);
-
- down_read(&iinfo->i_data_sem);
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
symlink = iinfo->i_data + iinfo->i_lenEAttr;
} else {
- bh = sb_bread(inode->i_sb, pos);
-
+ bh = udf_bread(inode, 0, 0, &err);
if (!bh) {
- err = -EIO;
- goto out_unlock_inode;
+ if (!err)
+ err = -EFSCORRUPTED;
+ goto out_err;
}
-
symlink = bh->b_data;
}
err = udf_pc_to_char(inode->i_sb, symlink, inode->i_size, p, PAGE_SIZE);
brelse(bh);
if (err)
- goto out_unlock_inode;
+ goto out_err;
- up_read(&iinfo->i_data_sem);
SetPageUptodate(page);
unlock_page(page);
return 0;
-out_unlock_inode:
- up_read(&iinfo->i_data_sem);
+out_err:
SetPageError(page);
-out_unmap:
+out_unlock:
unlock_page(page);
return err;
}
-static int udf_symlink_getattr(struct user_namespace *mnt_userns,
+static int udf_symlink_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
@@ -161,7 +153,7 @@ static int udf_symlink_getattr(struct user_namespace *mnt_userns,
struct inode *inode = d_backing_inode(dentry);
struct page *page;
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
page = read_mapping_page(inode->i_mapping, 0, NULL);
if (IS_ERR(page))
return PTR_ERR(page);
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 036ebd892b85..871856c69df5 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -125,7 +125,7 @@ void udf_discard_prealloc(struct inode *inode)
struct kernel_lb_addr eloc;
uint32_t elen;
uint64_t lbcount = 0;
- int8_t etype = -1, netype;
+ int8_t etype = -1;
struct udf_inode_info *iinfo = UDF_I(inode);
int bsize = 1 << inode->i_blkbits;
@@ -136,7 +136,7 @@ void udf_discard_prealloc(struct inode *inode)
epos.block = iinfo->i_location;
/* Find the last extent in the file */
- while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 0)) != -1) {
+ while (udf_next_aext(inode, &epos, &eloc, &elen, 0) != -1) {
brelse(prev_epos.bh);
prev_epos = epos;
if (prev_epos.bh)
@@ -240,7 +240,7 @@ int udf_truncate_extents(struct inode *inode)
brelse(epos.bh);
epos.offset = sizeof(struct allocExtDesc);
epos.block = eloc;
- epos.bh = udf_tread(sb,
+ epos.bh = sb_bread(sb,
udf_get_lb_pblock(sb, &eloc, 0));
/* Error reading indirect block? */
if (!epos.bh)
diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h
index 06ff7006b822..312b7c9ef10e 100644
--- a/fs/udf/udf_i.h
+++ b/fs/udf/udf_i.h
@@ -44,7 +44,8 @@ struct udf_inode_info {
unsigned i_use : 1; /* unallocSpaceEntry */
unsigned i_strat4096 : 1;
unsigned i_streamdir : 1;
- unsigned reserved : 25;
+ unsigned i_hidden : 1; /* hidden system inode */
+ unsigned reserved : 24;
__u8 *i_data;
struct kernel_lb_addr i_locStreamdir;
__u64 i_lenStreams;
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 291b56dd011e..9af6ff7f9747 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -23,7 +23,6 @@
#define UDF_FLAG_STRICT 5
#define UDF_FLAG_UNDELETE 6
#define UDF_FLAG_UNHIDE 7
-#define UDF_FLAG_VARCONV 8
#define UDF_FLAG_UID_FORGET 11 /* save -1 for uid to disk */
#define UDF_FLAG_GID_FORGET 12
#define UDF_FLAG_UID_SET 13
@@ -55,6 +54,8 @@
#define MF_DUPLICATE_MD 0x01
#define MF_MIRROR_FE_LOADED 0x02
+#define EFSCORRUPTED EUCLEAN
+
struct udf_meta_data {
__u32 s_meta_file_loc;
__u32 s_mirror_file_loc;
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 7e258f15b8ef..88692512a466 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -34,9 +34,6 @@ extern __printf(3, 4) void _udf_warn(struct super_block *sb,
#define udf_debug(fmt, ...) \
pr_debug("%s:%d:%s: " fmt, __FILE__, __LINE__, __func__, ##__VA_ARGS__)
-#define udf_fixed_to_variable(x) ( ( ( (x) >> 5 ) * 39 ) + ( (x) & 0x0000001F ) )
-#define udf_variable_to_fixed(x) ( ( ( (x) / 39 ) << 5 ) + ( (x) % 39 ) )
-
#define UDF_EXTENT_LENGTH_MASK 0x3FFFFFFF
#define UDF_EXTENT_FLAG_MASK 0xC0000000
@@ -83,14 +80,24 @@ extern const struct inode_operations udf_file_inode_operations;
extern const struct file_operations udf_file_operations;
extern const struct inode_operations udf_symlink_inode_operations;
extern const struct address_space_operations udf_aops;
-extern const struct address_space_operations udf_adinicb_aops;
extern const struct address_space_operations udf_symlink_aops;
-struct udf_fileident_bh {
- struct buffer_head *sbh;
- struct buffer_head *ebh;
- int soffset;
- int eoffset;
+struct udf_fileident_iter {
+ struct inode *dir; /* Directory we are working with */
+ loff_t pos; /* Logical position in a dir */
+ struct buffer_head *bh[2]; /* Buffer containing 'pos' and possibly
+ * next buffer if entry straddles
+ * blocks */
+ struct kernel_lb_addr eloc; /* Start of extent containing 'pos' */
+ uint32_t elen; /* Length of extent containing 'pos' */
+ sector_t loffset; /* Block offset of 'pos' within above
+ * extent */
+ struct extent_position epos; /* Position after the above extent */
+ struct fileIdentDesc fi; /* Copied directory entry */
+ uint8_t *name; /* Pointer to entry name */
+ uint8_t *namebuf; /* Storage for entry name in case
+ * the name is split between two blocks
+ */
};
struct udf_vds_record {
@@ -121,22 +128,16 @@ struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
u32 meta_file_loc, u32 partition_num);
/* namei.c */
-extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
- struct fileIdentDesc *, struct udf_fileident_bh *,
- uint8_t *, uint8_t *);
static inline unsigned int udf_dir_entry_len(struct fileIdentDesc *cfi)
{
return ALIGN(sizeof(struct fileIdentDesc) +
le16_to_cpu(cfi->lengthOfImpUse) + cfi->lengthFileIdent,
UDF_NAME_PAD);
}
-static inline uint8_t *udf_get_fi_ident(struct fileIdentDesc *fi)
-{
- return ((uint8_t *)(fi + 1)) + le16_to_cpu(fi->lengthOfImpUse);
-}
/* file.c */
extern long udf_ioctl(struct file *, unsigned int, unsigned long);
+
/* inode.c */
extern struct inode *__udf_iget(struct super_block *, struct kernel_lb_addr *,
bool hidden_inode);
@@ -151,16 +152,14 @@ static inline struct inode *udf_iget(struct super_block *sb,
return __udf_iget(sb, ino, false);
}
extern int udf_expand_file_adinicb(struct inode *);
-extern struct buffer_head *udf_expand_dir_adinicb(struct inode *inode,
- udf_pblk_t *block, int *err);
extern struct buffer_head *udf_bread(struct inode *inode, udf_pblk_t block,
int create, int *err);
extern int udf_setsize(struct inode *, loff_t);
extern void udf_evict_inode(struct inode *);
extern int udf_write_inode(struct inode *, struct writeback_control *wbc);
-extern udf_pblk_t udf_block_map(struct inode *inode, sector_t block);
extern int8_t inode_bmap(struct inode *, sector_t, struct extent_position *,
struct kernel_lb_addr *, uint32_t *, sector_t *);
+int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
extern int udf_setup_indirect_aext(struct inode *inode, udf_pblk_t block,
struct extent_position *epos);
extern int __udf_add_aext(struct inode *inode, struct extent_position *epos,
@@ -177,9 +176,6 @@ extern int8_t udf_current_aext(struct inode *, struct extent_position *,
extern void udf_update_extra_perms(struct inode *inode, umode_t mode);
/* misc.c */
-extern struct buffer_head *udf_tgetblk(struct super_block *sb,
- udf_pblk_t block);
-extern struct buffer_head *udf_tread(struct super_block *sb, udf_pblk_t block);
extern struct genericFormat *udf_add_extendedattr(struct inode *, uint32_t,
uint32_t, uint8_t);
extern struct genericFormat *udf_get_extendedattr(struct inode *, uint32_t,
@@ -194,7 +190,7 @@ extern void udf_new_tag(char *, uint16_t, uint16_t, uint16_t, uint32_t, int);
/* lowlevel.c */
extern unsigned int udf_get_last_session(struct super_block *);
-extern unsigned long udf_get_last_block(struct super_block *);
+udf_pblk_t udf_get_last_block(struct super_block *);
/* partition.c */
extern uint32_t udf_get_pblock(struct super_block *, uint32_t, uint16_t,
@@ -243,14 +239,13 @@ extern udf_pblk_t udf_new_block(struct super_block *sb, struct inode *inode,
uint16_t partition, uint32_t goal, int *err);
/* directory.c */
-extern struct fileIdentDesc *udf_fileident_read(struct inode *, loff_t *,
- struct udf_fileident_bh *,
- struct fileIdentDesc *,
- struct extent_position *,
- struct kernel_lb_addr *, uint32_t *,
- sector_t *);
-extern struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize,
- int *offset);
+int udf_fiiter_init(struct udf_fileident_iter *iter, struct inode *dir,
+ loff_t pos);
+int udf_fiiter_advance(struct udf_fileident_iter *iter);
+void udf_fiiter_release(struct udf_fileident_iter *iter);
+void udf_fiiter_write_fi(struct udf_fileident_iter *iter, uint8_t *impuse);
+void udf_fiiter_update_elen(struct udf_fileident_iter *iter, uint32_t new_elen);
+int udf_fiiter_append_blk(struct udf_fileident_iter *iter);
extern struct long_ad *udf_get_filelongad(uint8_t *, int, uint32_t *, int);
extern struct short_ad *udf_get_fileshortad(uint8_t *, int, uint32_t *, int);
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 7e3e08c0166f..06bd84d555bd 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -289,7 +289,7 @@ cg_found:
ufs_mark_sb_dirty(sb);
inode->i_ino = cg * uspi->s_ipg + bit;
- inode_init_owner(&init_user_ns, inode, dir, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
inode->i_blocks = 0;
inode->i_generation = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index a873de7dec1c..a4246c83a8cd 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1212,14 +1212,14 @@ out:
return err;
}
-int ufs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ufs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
unsigned int ia_valid = attr->ia_valid;
int error;
- error = setattr_prepare(&init_user_ns, dentry, attr);
+ error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (error)
return error;
@@ -1229,7 +1229,7 @@ int ufs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
return error;
}
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(&nop_mnt_idmap, inode, attr);
mark_inode_dirty(inode);
return 0;
}
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 29d5a0e0c8f0..36154b5aca6d 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -69,7 +69,7 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, unsi
* If the create succeeds, we fill in the inode information
* with d_instantiate().
*/
-static int ufs_create (struct user_namespace * mnt_userns,
+static int ufs_create (struct mnt_idmap * idmap,
struct inode * dir, struct dentry * dentry, umode_t mode,
bool excl)
{
@@ -86,7 +86,7 @@ static int ufs_create (struct user_namespace * mnt_userns,
return ufs_add_nondir(dentry, inode);
}
-static int ufs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int ufs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct inode *inode;
@@ -106,7 +106,7 @@ static int ufs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
return err;
}
-static int ufs_symlink (struct user_namespace * mnt_userns, struct inode * dir,
+static int ufs_symlink (struct mnt_idmap * idmap, struct inode * dir,
struct dentry * dentry, const char * symname)
{
struct super_block * sb = dir->i_sb;
@@ -166,7 +166,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
return error;
}
-static int ufs_mkdir(struct user_namespace * mnt_userns, struct inode * dir,
+static int ufs_mkdir(struct mnt_idmap * idmap, struct inode * dir,
struct dentry * dentry, umode_t mode)
{
struct inode * inode;
@@ -243,7 +243,7 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry)
return err;
}
-static int ufs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+static int ufs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 550f7c5a3636..6b499180643b 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -123,7 +123,7 @@ extern struct inode *ufs_iget(struct super_block *, unsigned long);
extern int ufs_write_inode (struct inode *, struct writeback_control *);
extern int ufs_sync_inode (struct inode *);
extern void ufs_evict_inode (struct inode *);
-extern int ufs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+extern int ufs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr);
/* namei.c */
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 98ac37e34e3d..cc694846617a 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -108,6 +108,21 @@ static bool userfaultfd_is_initialized(struct userfaultfd_ctx *ctx)
return ctx->features & UFFD_FEATURE_INITIALIZED;
}
+static void userfaultfd_set_vm_flags(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ const bool uffd_wp_changed = (vma->vm_flags ^ flags) & VM_UFFD_WP;
+
+ vma->vm_flags = flags;
+ /*
+ * For shared mappings, we want to enable writenotify while
+ * userfaultfd-wp is enabled (see vma_wants_writenotify()). We'll simply
+ * recalculate vma->vm_page_prot whenever userfaultfd-wp changes.
+ */
+ if ((vma->vm_flags & VM_SHARED) && uffd_wp_changed)
+ vma_set_page_prot(vma);
+}
+
static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
int wake_flags, void *key)
{
@@ -618,7 +633,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
for_each_vma(vmi, vma) {
if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
- vma->vm_flags &= ~__VM_UFFD_FLAGS;
+ userfaultfd_set_vm_flags(vma,
+ vma->vm_flags & ~__VM_UFFD_FLAGS);
}
}
mmap_write_unlock(mm);
@@ -652,7 +668,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
octx = vma->vm_userfaultfd_ctx.ctx;
if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
- vma->vm_flags &= ~__VM_UFFD_FLAGS;
+ userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS);
return 0;
}
@@ -733,7 +749,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
} else {
/* Drop uffd context if remap feature not enabled */
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
- vma->vm_flags &= ~__VM_UFFD_FLAGS;
+ userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS);
}
}
@@ -895,7 +911,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
prev = vma;
}
- vma->vm_flags = new_flags;
+ userfaultfd_set_vm_flags(vma, new_flags);
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
}
mmap_write_unlock(mm);
@@ -1463,7 +1479,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
* the next vma was merged into the current one and
* the current one has not been updated yet.
*/
- vma->vm_flags = new_flags;
+ userfaultfd_set_vm_flags(vma, new_flags);
vma->vm_userfaultfd_ctx.ctx = ctx;
if (is_vm_hugetlb_page(vma) && uffd_disable_huge_pmd_share(vma))
@@ -1651,7 +1667,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
* the next vma was merged into the current one and
* the current one has not been updated yet.
*/
- vma->vm_flags = new_flags;
+ userfaultfd_set_vm_flags(vma, new_flags);
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
skip:
diff --git a/fs/utimes.c b/fs/utimes.c
index 39f356017635..3701b3946f88 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -7,6 +7,7 @@
#include <linux/uaccess.h>
#include <linux/compat.h>
#include <asm/unistd.h>
+#include <linux/filelock.h>
static bool nsec_valid(long nsec)
{
@@ -62,7 +63,7 @@ int vfs_utimes(const struct path *path, struct timespec64 *times)
}
retry_deleg:
inode_lock(inode);
- error = notify_change(mnt_user_ns(path->mnt), path->dentry, &newattrs,
+ error = notify_change(mnt_idmap(path->mnt), path->dentry, &newattrs,
&delegated_inode);
inode_unlock(inode);
if (delegated_inode) {
diff --git a/fs/vboxsf/dir.c b/fs/vboxsf/dir.c
index c4769a9396c5..075f15c43c78 100644
--- a/fs/vboxsf/dir.c
+++ b/fs/vboxsf/dir.c
@@ -294,14 +294,14 @@ out:
return err;
}
-static int vboxsf_dir_mkfile(struct user_namespace *mnt_userns,
+static int vboxsf_dir_mkfile(struct mnt_idmap *idmap,
struct inode *parent, struct dentry *dentry,
umode_t mode, bool excl)
{
return vboxsf_dir_create(parent, dentry, mode, false, excl, NULL);
}
-static int vboxsf_dir_mkdir(struct user_namespace *mnt_userns,
+static int vboxsf_dir_mkdir(struct mnt_idmap *idmap,
struct inode *parent, struct dentry *dentry,
umode_t mode)
{
@@ -387,7 +387,7 @@ static int vboxsf_dir_unlink(struct inode *parent, struct dentry *dentry)
return 0;
}
-static int vboxsf_dir_rename(struct user_namespace *mnt_userns,
+static int vboxsf_dir_rename(struct mnt_idmap *idmap,
struct inode *old_parent,
struct dentry *old_dentry,
struct inode *new_parent,
@@ -430,7 +430,7 @@ err_put_old_path:
return err;
}
-static int vboxsf_dir_symlink(struct user_namespace *mnt_userns,
+static int vboxsf_dir_symlink(struct mnt_idmap *idmap,
struct inode *parent, struct dentry *dentry,
const char *symname)
{
diff --git a/fs/vboxsf/utils.c b/fs/vboxsf/utils.c
index e1db0f3f7e5e..dd0ae1188e87 100644
--- a/fs/vboxsf/utils.c
+++ b/fs/vboxsf/utils.c
@@ -231,7 +231,7 @@ int vboxsf_inode_revalidate(struct dentry *dentry)
return 0;
}
-int vboxsf_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int vboxsf_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *kstat, u32 request_mask, unsigned int flags)
{
int err;
@@ -252,11 +252,11 @@ int vboxsf_getattr(struct user_namespace *mnt_userns, const struct path *path,
if (err)
return err;
- generic_fillattr(&init_user_ns, d_inode(dentry), kstat);
+ generic_fillattr(&nop_mnt_idmap, d_inode(dentry), kstat);
return 0;
}
-int vboxsf_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int vboxsf_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *iattr)
{
struct vboxsf_inode *sf_i = VBOXSF_I(d_inode(dentry));
diff --git a/fs/vboxsf/vfsmod.h b/fs/vboxsf/vfsmod.h
index 9047befa66c5..05973eb89d52 100644
--- a/fs/vboxsf/vfsmod.h
+++ b/fs/vboxsf/vfsmod.h
@@ -97,10 +97,10 @@ int vboxsf_stat(struct vboxsf_sbi *sbi, struct shfl_string *path,
struct shfl_fsobjinfo *info);
int vboxsf_stat_dentry(struct dentry *dentry, struct shfl_fsobjinfo *info);
int vboxsf_inode_revalidate(struct dentry *dentry);
-int vboxsf_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int vboxsf_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *kstat, u32 request_mask,
unsigned int query_flags);
-int vboxsf_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int vboxsf_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *iattr);
struct shfl_string *vboxsf_path_from_dentry(struct vboxsf_sbi *sbi,
struct dentry *dentry);
diff --git a/fs/verity/Kconfig b/fs/verity/Kconfig
index aad1f1d998b9..a7ffd718f171 100644
--- a/fs/verity/Kconfig
+++ b/fs/verity/Kconfig
@@ -34,14 +34,6 @@ config FS_VERITY
If unsure, say N.
-config FS_VERITY_DEBUG
- bool "FS Verity debugging"
- depends on FS_VERITY
- help
- Enable debugging messages related to fs-verity by default.
-
- Say N unless you are an fs-verity developer.
-
config FS_VERITY_BUILTIN_SIGNATURES
bool "FS Verity builtin signature support"
depends on FS_VERITY
diff --git a/fs/verity/enable.c b/fs/verity/enable.c
index df6b499bf6a1..e13db6507b38 100644
--- a/fs/verity/enable.c
+++ b/fs/verity/enable.c
@@ -7,136 +7,50 @@
#include "fsverity_private.h"
-#include <crypto/hash.h>
-#include <linux/backing-dev.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/sched/signal.h>
#include <linux/uaccess.h>
-/*
- * Read a file data page for Merkle tree construction. Do aggressive readahead,
- * since we're sequentially reading the entire file.
- */
-static struct page *read_file_data_page(struct file *file, pgoff_t index,
- struct file_ra_state *ra,
- unsigned long remaining_pages)
-{
- DEFINE_READAHEAD(ractl, file, ra, file->f_mapping, index);
- struct folio *folio;
-
- folio = __filemap_get_folio(ractl.mapping, index, FGP_ACCESSED, 0);
- if (!folio || !folio_test_uptodate(folio)) {
- if (folio)
- folio_put(folio);
- else
- page_cache_sync_ra(&ractl, remaining_pages);
- folio = read_cache_folio(ractl.mapping, index, NULL, file);
- if (IS_ERR(folio))
- return &folio->page;
- }
- if (folio_test_readahead(folio))
- page_cache_async_ra(&ractl, folio, remaining_pages);
- return folio_file_page(folio, index);
-}
+struct block_buffer {
+ u32 filled;
+ u8 *data;
+};
-static int build_merkle_tree_level(struct file *filp, unsigned int level,
- u64 num_blocks_to_hash,
- const struct merkle_tree_params *params,
- u8 *pending_hashes,
- struct ahash_request *req)
+/* Hash a block, writing the result to the next level's pending block buffer. */
+static int hash_one_block(struct inode *inode,
+ const struct merkle_tree_params *params,
+ struct ahash_request *req, struct block_buffer *cur)
{
- struct inode *inode = file_inode(filp);
- const struct fsverity_operations *vops = inode->i_sb->s_vop;
- struct file_ra_state ra = { 0 };
- unsigned int pending_size = 0;
- u64 dst_block_num;
- u64 i;
+ struct block_buffer *next = cur + 1;
int err;
- if (WARN_ON(params->block_size != PAGE_SIZE)) /* checked earlier too */
- return -EINVAL;
-
- if (level < params->num_levels) {
- dst_block_num = params->level_start[level];
- } else {
- if (WARN_ON(num_blocks_to_hash != 1))
- return -EINVAL;
- dst_block_num = 0; /* unused */
- }
+ /* Zero-pad the block if it's shorter than the block size. */
+ memset(&cur->data[cur->filled], 0, params->block_size - cur->filled);
- file_ra_state_init(&ra, filp->f_mapping);
-
- for (i = 0; i < num_blocks_to_hash; i++) {
- struct page *src_page;
-
- if ((pgoff_t)i % 10000 == 0 || i + 1 == num_blocks_to_hash)
- pr_debug("Hashing block %llu of %llu for level %u\n",
- i + 1, num_blocks_to_hash, level);
-
- if (level == 0) {
- /* Leaf: hashing a data block */
- src_page = read_file_data_page(filp, i, &ra,
- num_blocks_to_hash - i);
- if (IS_ERR(src_page)) {
- err = PTR_ERR(src_page);
- fsverity_err(inode,
- "Error %d reading data page %llu",
- err, i);
- return err;
- }
- } else {
- unsigned long num_ra_pages =
- min_t(unsigned long, num_blocks_to_hash - i,
- inode->i_sb->s_bdi->io_pages);
-
- /* Non-leaf: hashing hash block from level below */
- src_page = vops->read_merkle_tree_page(inode,
- params->level_start[level - 1] + i,
- num_ra_pages);
- if (IS_ERR(src_page)) {
- err = PTR_ERR(src_page);
- fsverity_err(inode,
- "Error %d reading Merkle tree page %llu",
- err, params->level_start[level - 1] + i);
- return err;
- }
- }
+ err = fsverity_hash_block(params, inode, req, virt_to_page(cur->data),
+ offset_in_page(cur->data),
+ &next->data[next->filled]);
+ if (err)
+ return err;
+ next->filled += params->digest_size;
+ cur->filled = 0;
+ return 0;
+}
- err = fsverity_hash_page(params, inode, req, src_page,
- &pending_hashes[pending_size]);
- put_page(src_page);
- if (err)
- return err;
- pending_size += params->digest_size;
-
- if (level == params->num_levels) /* Root hash? */
- return 0;
-
- if (pending_size + params->digest_size > params->block_size ||
- i + 1 == num_blocks_to_hash) {
- /* Flush the pending hash block */
- memset(&pending_hashes[pending_size], 0,
- params->block_size - pending_size);
- err = vops->write_merkle_tree_block(inode,
- pending_hashes,
- dst_block_num,
- params->log_blocksize);
- if (err) {
- fsverity_err(inode,
- "Error %d writing Merkle tree block %llu",
- err, dst_block_num);
- return err;
- }
- dst_block_num++;
- pending_size = 0;
- }
+static int write_merkle_tree_block(struct inode *inode, const u8 *buf,
+ unsigned long index,
+ const struct merkle_tree_params *params)
+{
+ u64 pos = (u64)index << params->log_blocksize;
+ int err;
- if (fatal_signal_pending(current))
- return -EINTR;
- cond_resched();
- }
- return 0;
+ err = inode->i_sb->s_vop->write_merkle_tree_block(inode, buf, pos,
+ params->block_size);
+ if (err)
+ fsverity_err(inode, "Error %d writing Merkle tree block %lu",
+ err, index);
+ return err;
}
/*
@@ -152,13 +66,17 @@ static int build_merkle_tree(struct file *filp,
u8 *root_hash)
{
struct inode *inode = file_inode(filp);
- u8 *pending_hashes;
+ const u64 data_size = inode->i_size;
+ const int num_levels = params->num_levels;
struct ahash_request *req;
- u64 blocks;
- unsigned int level;
- int err = -ENOMEM;
+ struct block_buffer _buffers[1 + FS_VERITY_MAX_LEVELS + 1] = {};
+ struct block_buffer *buffers = &_buffers[1];
+ unsigned long level_offset[FS_VERITY_MAX_LEVELS];
+ int level;
+ u64 offset;
+ int err;
- if (inode->i_size == 0) {
+ if (data_size == 0) {
/* Empty file is a special case; root hash is all 0's */
memset(root_hash, 0, params->digest_size);
return 0;
@@ -167,29 +85,95 @@ static int build_merkle_tree(struct file *filp,
/* This allocation never fails, since it's mempool-backed. */
req = fsverity_alloc_hash_request(params->hash_alg, GFP_KERNEL);
- pending_hashes = kmalloc(params->block_size, GFP_KERNEL);
- if (!pending_hashes)
- goto out;
-
/*
- * Build each level of the Merkle tree, starting at the leaf level
- * (level 0) and ascending to the root node (level 'num_levels - 1').
- * Then at the end (level 'num_levels'), calculate the root hash.
+ * Allocate the block buffers. Buffer "-1" is for data blocks.
+ * Buffers 0 <= level < num_levels are for the actual tree levels.
+ * Buffer 'num_levels' is for the root hash.
*/
- blocks = ((u64)inode->i_size + params->block_size - 1) >>
- params->log_blocksize;
- for (level = 0; level <= params->num_levels; level++) {
- err = build_merkle_tree_level(filp, level, blocks, params,
- pending_hashes, req);
+ for (level = -1; level < num_levels; level++) {
+ buffers[level].data = kzalloc(params->block_size, GFP_KERNEL);
+ if (!buffers[level].data) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+ buffers[num_levels].data = root_hash;
+
+ BUILD_BUG_ON(sizeof(level_offset) != sizeof(params->level_start));
+ memcpy(level_offset, params->level_start, sizeof(level_offset));
+
+ /* Hash each data block, also hashing the tree blocks as they fill up */
+ for (offset = 0; offset < data_size; offset += params->block_size) {
+ ssize_t bytes_read;
+ loff_t pos = offset;
+
+ buffers[-1].filled = min_t(u64, params->block_size,
+ data_size - offset);
+ bytes_read = __kernel_read(filp, buffers[-1].data,
+ buffers[-1].filled, &pos);
+ if (bytes_read < 0) {
+ err = bytes_read;
+ fsverity_err(inode, "Error %d reading file data", err);
+ goto out;
+ }
+ if (bytes_read != buffers[-1].filled) {
+ err = -EINVAL;
+ fsverity_err(inode, "Short read of file data");
+ goto out;
+ }
+ err = hash_one_block(inode, params, req, &buffers[-1]);
if (err)
goto out;
- blocks = (blocks + params->hashes_per_block - 1) >>
- params->log_arity;
+ for (level = 0; level < num_levels; level++) {
+ if (buffers[level].filled + params->digest_size <=
+ params->block_size) {
+ /* Next block at @level isn't full yet */
+ break;
+ }
+ /* Next block at @level is full */
+
+ err = hash_one_block(inode, params, req,
+ &buffers[level]);
+ if (err)
+ goto out;
+ err = write_merkle_tree_block(inode,
+ buffers[level].data,
+ level_offset[level],
+ params);
+ if (err)
+ goto out;
+ level_offset[level]++;
+ }
+ if (fatal_signal_pending(current)) {
+ err = -EINTR;
+ goto out;
+ }
+ cond_resched();
+ }
+ /* Finish all nonempty pending tree blocks. */
+ for (level = 0; level < num_levels; level++) {
+ if (buffers[level].filled != 0) {
+ err = hash_one_block(inode, params, req,
+ &buffers[level]);
+ if (err)
+ goto out;
+ err = write_merkle_tree_block(inode,
+ buffers[level].data,
+ level_offset[level],
+ params);
+ if (err)
+ goto out;
+ }
+ }
+ /* The root hash was filled by the last call to hash_one_block(). */
+ if (WARN_ON(buffers[num_levels].filled != params->digest_size)) {
+ err = -EINVAL;
+ goto out;
}
- memcpy(root_hash, pending_hashes, params->digest_size);
err = 0;
out:
- kfree(pending_hashes);
+ for (level = -1; level < num_levels; level++)
+ kfree(buffers[level].data);
fsverity_free_hash_request(params->hash_alg, req);
return err;
}
@@ -263,15 +247,12 @@ static int enable_verity(struct file *filp,
* ->begin_enable_verity() and ->end_enable_verity() using the inode
* lock and only allow one process to be here at a time on a given file.
*/
- pr_debug("Building Merkle tree...\n");
BUILD_BUG_ON(sizeof(desc->root_hash) < FS_VERITY_MAX_DIGEST_SIZE);
err = build_merkle_tree(filp, &params, desc->root_hash);
if (err) {
fsverity_err(inode, "Error %d building Merkle tree", err);
goto rollback;
}
- pr_debug("Done building Merkle tree. Root hash is %s:%*phN\n",
- params.hash_alg->name, params.digest_size, desc->root_hash);
/*
* Create the fsverity_info. Don't bother trying to save work by
@@ -286,10 +267,6 @@ static int enable_verity(struct file *filp,
goto rollback;
}
- if (arg->sig_size)
- pr_debug("Storing a %u-byte PKCS#7 signature alongside the file\n",
- arg->sig_size);
-
/*
* Tell the filesystem to finish enabling verity on the file.
* Serialized with ->begin_enable_verity() by the inode lock.
@@ -352,7 +329,7 @@ int fsverity_ioctl_enable(struct file *filp, const void __user *uarg)
memchr_inv(arg.__reserved2, 0, sizeof(arg.__reserved2)))
return -EINVAL;
- if (arg.block_size != PAGE_SIZE)
+ if (!is_power_of_2(arg.block_size))
return -EINVAL;
if (arg.salt_size > sizeof_field(struct fsverity_descriptor, salt))
diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
index c7fcb855e068..d34dcc033d72 100644
--- a/fs/verity/fsverity_private.h
+++ b/fs/verity/fsverity_private.h
@@ -8,10 +8,6 @@
#ifndef _FSVERITY_PRIVATE_H
#define _FSVERITY_PRIVATE_H
-#ifdef CONFIG_FS_VERITY_DEBUG
-#define DEBUG
-#endif
-
#define pr_fmt(fmt) "fs-verity: " fmt
#include <linux/fsverity.h>
@@ -46,17 +42,20 @@ struct merkle_tree_params {
unsigned int digest_size; /* same as hash_alg->digest_size */
unsigned int block_size; /* size of data and tree blocks */
unsigned int hashes_per_block; /* number of hashes per tree block */
- unsigned int log_blocksize; /* log2(block_size) */
- unsigned int log_arity; /* log2(hashes_per_block) */
+ unsigned int blocks_per_page; /* PAGE_SIZE / block_size */
+ u8 log_digestsize; /* log2(digest_size) */
+ u8 log_blocksize; /* log2(block_size) */
+ u8 log_arity; /* log2(hashes_per_block) */
+ u8 log_blocks_per_page; /* log2(blocks_per_page) */
unsigned int num_levels; /* number of levels in Merkle tree */
u64 tree_size; /* Merkle tree size in bytes */
- unsigned long level0_blocks; /* number of blocks in tree level 0 */
+ unsigned long tree_pages; /* Merkle tree size in pages */
/*
* Starting block index for each tree level, ordered from leaf level (0)
* to root level ('num_levels - 1')
*/
- u64 level_start[FS_VERITY_MAX_LEVELS];
+ unsigned long level_start[FS_VERITY_MAX_LEVELS];
};
/*
@@ -73,9 +72,10 @@ struct fsverity_info {
u8 root_hash[FS_VERITY_MAX_DIGEST_SIZE];
u8 file_digest[FS_VERITY_MAX_DIGEST_SIZE];
const struct inode *inode;
+ unsigned long *hash_block_verified;
+ spinlock_t hash_page_init_lock;
};
-
#define FS_VERITY_MAX_SIGNATURE_SIZE (FS_VERITY_MAX_DESCRIPTOR_SIZE - \
sizeof(struct fsverity_descriptor))
@@ -91,9 +91,9 @@ void fsverity_free_hash_request(struct fsverity_hash_alg *alg,
struct ahash_request *req);
const u8 *fsverity_prepare_hash_state(struct fsverity_hash_alg *alg,
const u8 *salt, size_t salt_size);
-int fsverity_hash_page(const struct merkle_tree_params *params,
- const struct inode *inode,
- struct ahash_request *req, struct page *page, u8 *out);
+int fsverity_hash_block(const struct merkle_tree_params *params,
+ const struct inode *inode, struct ahash_request *req,
+ struct page *page, unsigned int offset, u8 *out);
int fsverity_hash_buffer(struct fsverity_hash_alg *alg,
const void *data, size_t size, u8 *out);
void __init fsverity_check_hash_algs(void);
diff --git a/fs/verity/hash_algs.c b/fs/verity/hash_algs.c
index 6f8170cf4ae7..13fcf31be844 100644
--- a/fs/verity/hash_algs.c
+++ b/fs/verity/hash_algs.c
@@ -220,35 +220,33 @@ err_free:
}
/**
- * fsverity_hash_page() - hash a single data or hash page
+ * fsverity_hash_block() - hash a single data or hash block
* @params: the Merkle tree's parameters
* @inode: inode for which the hashing is being done
* @req: preallocated hash request
- * @page: the page to hash
+ * @page: the page containing the block to hash
+ * @offset: the offset of the block within @page
* @out: output digest, size 'params->digest_size' bytes
*
- * Hash a single data or hash block, assuming block_size == PAGE_SIZE.
- * The hash is salted if a salt is specified in the Merkle tree parameters.
+ * Hash a single data or hash block. The hash is salted if a salt is specified
+ * in the Merkle tree parameters.
*
* Return: 0 on success, -errno on failure
*/
-int fsverity_hash_page(const struct merkle_tree_params *params,
- const struct inode *inode,
- struct ahash_request *req, struct page *page, u8 *out)
+int fsverity_hash_block(const struct merkle_tree_params *params,
+ const struct inode *inode, struct ahash_request *req,
+ struct page *page, unsigned int offset, u8 *out)
{
struct scatterlist sg;
DECLARE_CRYPTO_WAIT(wait);
int err;
- if (WARN_ON(params->block_size != PAGE_SIZE))
- return -EINVAL;
-
sg_init_table(&sg, 1);
- sg_set_page(&sg, page, PAGE_SIZE, 0);
+ sg_set_page(&sg, page, params->block_size, offset);
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
CRYPTO_TFM_REQ_MAY_BACKLOG,
crypto_req_done, &wait);
- ahash_request_set_crypt(req, &sg, out, PAGE_SIZE);
+ ahash_request_set_crypt(req, &sg, out, params->block_size);
if (params->hashstate) {
err = crypto_ahash_import(req, params->hashstate);
@@ -264,7 +262,7 @@ int fsverity_hash_page(const struct merkle_tree_params *params,
err = crypto_wait_req(err, &wait);
if (err)
- fsverity_err(inode, "Error %d computing page hash", err);
+ fsverity_err(inode, "Error %d computing block hash", err);
return err;
}
diff --git a/fs/verity/init.c b/fs/verity/init.c
index c98b7016f446..023905151035 100644
--- a/fs/verity/init.c
+++ b/fs/verity/init.c
@@ -49,7 +49,6 @@ static int __init fsverity_init(void)
if (err)
goto err_exit_workqueue;
- pr_debug("Initialized fs-verity\n");
return 0;
err_exit_workqueue:
diff --git a/fs/verity/open.c b/fs/verity/open.c
index 81ff94442f7b..9366b441d01c 100644
--- a/fs/verity/open.c
+++ b/fs/verity/open.c
@@ -7,6 +7,7 @@
#include "fsverity_private.h"
+#include <linux/mm.h>
#include <linux/slab.h>
static struct kmem_cache *fsverity_info_cachep;
@@ -34,6 +35,7 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
struct fsverity_hash_alg *hash_alg;
int err;
u64 blocks;
+ u64 blocks_in_level[FS_VERITY_MAX_LEVELS];
u64 offset;
int level;
@@ -54,7 +56,23 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
goto out_err;
}
- if (log_blocksize != PAGE_SHIFT) {
+ /*
+ * fs/verity/ directly assumes that the Merkle tree block size is a
+ * power of 2 less than or equal to PAGE_SIZE. Another restriction
+ * arises from the interaction between fs/verity/ and the filesystems
+ * themselves: filesystems expect to be able to verify a single
+ * filesystem block of data at a time. Therefore, the Merkle tree block
+ * size must also be less than or equal to the filesystem block size.
+ *
+ * The above are the only hard limitations, so in theory the Merkle tree
+ * block size could be as small as twice the digest size. However,
+ * that's not useful, and it would result in some unusually deep and
+ * large Merkle trees. So we currently require that the Merkle tree
+ * block size be at least 1024 bytes. That's small enough to test the
+ * sub-page block case on systems with 4K pages, but not too small.
+ */
+ if (log_blocksize < 10 || log_blocksize > PAGE_SHIFT ||
+ log_blocksize > inode->i_blkbits) {
fsverity_warn(inode, "Unsupported log_blocksize: %u",
log_blocksize);
err = -EINVAL;
@@ -62,6 +80,8 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
}
params->log_blocksize = log_blocksize;
params->block_size = 1 << log_blocksize;
+ params->log_blocks_per_page = PAGE_SHIFT - log_blocksize;
+ params->blocks_per_page = 1 << params->log_blocks_per_page;
if (WARN_ON(!is_power_of_2(params->digest_size))) {
err = -EINVAL;
@@ -74,13 +94,10 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
err = -EINVAL;
goto out_err;
}
- params->log_arity = params->log_blocksize - ilog2(params->digest_size);
+ params->log_digestsize = ilog2(params->digest_size);
+ params->log_arity = log_blocksize - params->log_digestsize;
params->hashes_per_block = 1 << params->log_arity;
- pr_debug("Merkle tree uses %s with %u-byte blocks (%u hashes/block), salt=%*phN\n",
- hash_alg->name, params->block_size, params->hashes_per_block,
- (int)salt_size, salt);
-
/*
* Compute the number of levels in the Merkle tree and create a map from
* level to the starting block of that level. Level 'num_levels - 1' is
@@ -90,31 +107,45 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
/* Compute number of levels and the number of blocks in each level */
blocks = ((u64)inode->i_size + params->block_size - 1) >> log_blocksize;
- pr_debug("Data is %lld bytes (%llu blocks)\n", inode->i_size, blocks);
while (blocks > 1) {
if (params->num_levels >= FS_VERITY_MAX_LEVELS) {
fsverity_err(inode, "Too many levels in Merkle tree");
- err = -EINVAL;
+ err = -EFBIG;
goto out_err;
}
blocks = (blocks + params->hashes_per_block - 1) >>
params->log_arity;
- /* temporarily using level_start[] to store blocks in level */
- params->level_start[params->num_levels++] = blocks;
+ blocks_in_level[params->num_levels++] = blocks;
}
- params->level0_blocks = params->level_start[0];
/* Compute the starting block of each level */
offset = 0;
for (level = (int)params->num_levels - 1; level >= 0; level--) {
- blocks = params->level_start[level];
params->level_start[level] = offset;
- pr_debug("Level %d is %llu blocks starting at index %llu\n",
- level, blocks, offset);
- offset += blocks;
+ offset += blocks_in_level[level];
+ }
+
+ /*
+ * With block_size != PAGE_SIZE, an in-memory bitmap will need to be
+ * allocated to track the "verified" status of hash blocks. Don't allow
+ * this bitmap to get too large. For now, limit it to 1 MiB, which
+ * limits the file size to about 4.4 TB with SHA-256 and 4K blocks.
+ *
+ * Together with the fact that the data, and thus also the Merkle tree,
+ * cannot have more than ULONG_MAX pages, this implies that hash block
+ * indices can always fit in an 'unsigned long'. But to be safe, we
+ * explicitly check for that too. Note, this is only for hash block
+ * indices; data block indices might not fit in an 'unsigned long'.
+ */
+ if ((params->block_size != PAGE_SIZE && offset > 1 << 23) ||
+ offset > ULONG_MAX) {
+ fsverity_err(inode, "Too many blocks in Merkle tree");
+ err = -EFBIG;
+ goto out_err;
}
params->tree_size = offset << log_blocksize;
+ params->tree_pages = PAGE_ALIGN(params->tree_size) >> PAGE_SHIFT;
return 0;
out_err:
@@ -165,7 +196,7 @@ struct fsverity_info *fsverity_create_info(const struct inode *inode,
fsverity_err(inode,
"Error %d initializing Merkle tree parameters",
err);
- goto out;
+ goto fail;
}
memcpy(vi->root_hash, desc->root_hash, vi->tree_params.digest_size);
@@ -174,20 +205,48 @@ struct fsverity_info *fsverity_create_info(const struct inode *inode,
vi->file_digest);
if (err) {
fsverity_err(inode, "Error %d computing file digest", err);
- goto out;
+ goto fail;
}
- pr_debug("Computed file digest: %s:%*phN\n",
- vi->tree_params.hash_alg->name,
- vi->tree_params.digest_size, vi->file_digest);
err = fsverity_verify_signature(vi, desc->signature,
le32_to_cpu(desc->sig_size));
-out:
- if (err) {
- fsverity_free_info(vi);
- vi = ERR_PTR(err);
+ if (err)
+ goto fail;
+
+ if (vi->tree_params.block_size != PAGE_SIZE) {
+ /*
+ * When the Merkle tree block size and page size differ, we use
+ * a bitmap to keep track of which hash blocks have been
+ * verified. This bitmap must contain one bit per hash block,
+ * including alignment to a page boundary at the end.
+ *
+ * Eventually, to support extremely large files in an efficient
+ * way, it might be necessary to make pages of this bitmap
+ * reclaimable. But for now, simply allocating the whole bitmap
+ * is a simple solution that works well on the files on which
+ * fsverity is realistically used. E.g., with SHA-256 and 4K
+ * blocks, a 100MB file only needs a 24-byte bitmap, and the
+ * bitmap for any file under 17GB fits in a 4K page.
+ */
+ unsigned long num_bits =
+ vi->tree_params.tree_pages <<
+ vi->tree_params.log_blocks_per_page;
+
+ vi->hash_block_verified = kvcalloc(BITS_TO_LONGS(num_bits),
+ sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!vi->hash_block_verified) {
+ err = -ENOMEM;
+ goto fail;
+ }
+ spin_lock_init(&vi->hash_page_init_lock);
}
+
return vi;
+
+fail:
+ fsverity_free_info(vi);
+ return ERR_PTR(err);
}
void fsverity_set_info(struct inode *inode, struct fsverity_info *vi)
@@ -214,6 +273,7 @@ void fsverity_free_info(struct fsverity_info *vi)
if (!vi)
return;
kfree(vi->tree_params.hashstate);
+ kvfree(vi->hash_block_verified);
kmem_cache_free(fsverity_info_cachep, vi);
}
@@ -325,67 +385,28 @@ out_free_desc:
return err;
}
-/**
- * fsverity_file_open() - prepare to open a verity file
- * @inode: the inode being opened
- * @filp: the struct file being set up
- *
- * When opening a verity file, deny the open if it is for writing. Otherwise,
- * set up the inode's ->i_verity_info if not already done.
- *
- * When combined with fscrypt, this must be called after fscrypt_file_open().
- * Otherwise, we won't have the key set up to decrypt the verity metadata.
- *
- * Return: 0 on success, -errno on failure
- */
-int fsverity_file_open(struct inode *inode, struct file *filp)
+int __fsverity_file_open(struct inode *inode, struct file *filp)
{
- if (!IS_VERITY(inode))
- return 0;
-
- if (filp->f_mode & FMODE_WRITE) {
- pr_debug("Denying opening verity file (ino %lu) for write\n",
- inode->i_ino);
+ if (filp->f_mode & FMODE_WRITE)
return -EPERM;
- }
-
return ensure_verity_info(inode);
}
-EXPORT_SYMBOL_GPL(fsverity_file_open);
+EXPORT_SYMBOL_GPL(__fsverity_file_open);
-/**
- * fsverity_prepare_setattr() - prepare to change a verity inode's attributes
- * @dentry: dentry through which the inode is being changed
- * @attr: attributes to change
- *
- * Verity files are immutable, so deny truncates. This isn't covered by the
- * open-time check because sys_truncate() takes a path, not a file descriptor.
- *
- * Return: 0 on success, -errno on failure
- */
-int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr)
+int __fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr)
{
- if (IS_VERITY(d_inode(dentry)) && (attr->ia_valid & ATTR_SIZE)) {
- pr_debug("Denying truncate of verity file (ino %lu)\n",
- d_inode(dentry)->i_ino);
+ if (attr->ia_valid & ATTR_SIZE)
return -EPERM;
- }
return 0;
}
-EXPORT_SYMBOL_GPL(fsverity_prepare_setattr);
+EXPORT_SYMBOL_GPL(__fsverity_prepare_setattr);
-/**
- * fsverity_cleanup_inode() - free the inode's verity info, if present
- * @inode: an inode being evicted
- *
- * Filesystems must call this on inode eviction to free ->i_verity_info.
- */
-void fsverity_cleanup_inode(struct inode *inode)
+void __fsverity_cleanup_inode(struct inode *inode)
{
fsverity_free_info(inode->i_verity_info);
inode->i_verity_info = NULL;
}
-EXPORT_SYMBOL_GPL(fsverity_cleanup_inode);
+EXPORT_SYMBOL_GPL(__fsverity_cleanup_inode);
int __init fsverity_init_info_cache(void)
{
diff --git a/fs/verity/signature.c b/fs/verity/signature.c
index 143a530a8008..e7d3ca919a1e 100644
--- a/fs/verity/signature.c
+++ b/fs/verity/signature.c
@@ -82,8 +82,6 @@ int fsverity_verify_signature(const struct fsverity_info *vi,
return err;
}
- pr_debug("Valid signature for file digest %s:%*phN\n",
- hash_alg->name, hash_alg->digest_size, vi->file_digest);
return 0;
}
diff --git a/fs/verity/verify.c b/fs/verity/verify.c
index 961ba248021f..f50e3b5b52c9 100644
--- a/fs/verity/verify.c
+++ b/fs/verity/verify.c
@@ -9,39 +9,12 @@
#include <crypto/hash.h>
#include <linux/bio.h>
-#include <linux/ratelimit.h>
static struct workqueue_struct *fsverity_read_workqueue;
-/**
- * hash_at_level() - compute the location of the block's hash at the given level
- *
- * @params: (in) the Merkle tree parameters
- * @dindex: (in) the index of the data block being verified
- * @level: (in) the level of hash we want (0 is leaf level)
- * @hindex: (out) the index of the hash block containing the wanted hash
- * @hoffset: (out) the byte offset to the wanted hash within the hash block
- */
-static void hash_at_level(const struct merkle_tree_params *params,
- pgoff_t dindex, unsigned int level, pgoff_t *hindex,
- unsigned int *hoffset)
-{
- pgoff_t position;
-
- /* Offset of the hash within the level's region, in hashes */
- position = dindex >> (level * params->log_arity);
-
- /* Index of the hash block in the tree overall */
- *hindex = params->level_start[level] + (position >> params->log_arity);
-
- /* Offset of the wanted hash (in bytes) within the hash block */
- *hoffset = (position & ((1 << params->log_arity) - 1)) <<
- (params->log_blocksize - params->log_arity);
-}
-
static inline int cmp_hashes(const struct fsverity_info *vi,
const u8 *want_hash, const u8 *real_hash,
- pgoff_t index, int level)
+ u64 data_pos, int level)
{
const unsigned int hsize = vi->tree_params.digest_size;
@@ -49,159 +22,312 @@ static inline int cmp_hashes(const struct fsverity_info *vi,
return 0;
fsverity_err(vi->inode,
- "FILE CORRUPTED! index=%lu, level=%d, want_hash=%s:%*phN, real_hash=%s:%*phN",
- index, level,
+ "FILE CORRUPTED! pos=%llu, level=%d, want_hash=%s:%*phN, real_hash=%s:%*phN",
+ data_pos, level,
vi->tree_params.hash_alg->name, hsize, want_hash,
vi->tree_params.hash_alg->name, hsize, real_hash);
return -EBADMSG;
}
+static bool data_is_zeroed(struct inode *inode, struct page *page,
+ unsigned int len, unsigned int offset)
+{
+ void *virt = kmap_local_page(page);
+
+ if (memchr_inv(virt + offset, 0, len)) {
+ kunmap_local(virt);
+ fsverity_err(inode,
+ "FILE CORRUPTED! Data past EOF is not zeroed");
+ return false;
+ }
+ kunmap_local(virt);
+ return true;
+}
+
+/*
+ * Returns true if the hash block with index @hblock_idx in the tree, located in
+ * @hpage, has already been verified.
+ */
+static bool is_hash_block_verified(struct fsverity_info *vi, struct page *hpage,
+ unsigned long hblock_idx)
+{
+ bool verified;
+ unsigned int blocks_per_page;
+ unsigned int i;
+
+ /*
+ * When the Merkle tree block size and page size are the same, then the
+ * ->hash_block_verified bitmap isn't allocated, and we use PG_checked
+ * to directly indicate whether the page's block has been verified.
+ *
+ * Using PG_checked also guarantees that we re-verify hash pages that
+ * get evicted and re-instantiated from the backing storage, as new
+ * pages always start out with PG_checked cleared.
+ */
+ if (!vi->hash_block_verified)
+ return PageChecked(hpage);
+
+ /*
+ * When the Merkle tree block size and page size differ, we use a bitmap
+ * to indicate whether each hash block has been verified.
+ *
+ * However, we still need to ensure that hash pages that get evicted and
+ * re-instantiated from the backing storage are re-verified. To do
+ * this, we use PG_checked again, but now it doesn't really mean
+ * "checked". Instead, now it just serves as an indicator for whether
+ * the hash page is newly instantiated or not.
+ *
+ * The first thread that sees PG_checked=0 must clear the corresponding
+ * bitmap bits, then set PG_checked=1. This requires a spinlock. To
+ * avoid having to take this spinlock in the common case of
+ * PG_checked=1, we start with an opportunistic lockless read.
+ */
+ if (PageChecked(hpage)) {
+ /*
+ * A read memory barrier is needed here to give ACQUIRE
+ * semantics to the above PageChecked() test.
+ */
+ smp_rmb();
+ return test_bit(hblock_idx, vi->hash_block_verified);
+ }
+ spin_lock(&vi->hash_page_init_lock);
+ if (PageChecked(hpage)) {
+ verified = test_bit(hblock_idx, vi->hash_block_verified);
+ } else {
+ blocks_per_page = vi->tree_params.blocks_per_page;
+ hblock_idx = round_down(hblock_idx, blocks_per_page);
+ for (i = 0; i < blocks_per_page; i++)
+ clear_bit(hblock_idx + i, vi->hash_block_verified);
+ /*
+ * A write memory barrier is needed here to give RELEASE
+ * semantics to the below SetPageChecked() operation.
+ */
+ smp_wmb();
+ SetPageChecked(hpage);
+ verified = false;
+ }
+ spin_unlock(&vi->hash_page_init_lock);
+ return verified;
+}
+
/*
- * Verify a single data page against the file's Merkle tree.
+ * Verify a single data block against the file's Merkle tree.
*
* In principle, we need to verify the entire path to the root node. However,
- * for efficiency the filesystem may cache the hash pages. Therefore we need
- * only ascend the tree until an already-verified page is seen, as indicated by
- * the PageChecked bit being set; then verify the path to that page.
- *
- * This code currently only supports the case where the verity block size is
- * equal to PAGE_SIZE. Doing otherwise would be possible but tricky, since we
- * wouldn't be able to use the PageChecked bit.
- *
- * Note that multiple processes may race to verify a hash page and mark it
- * Checked, but it doesn't matter; the result will be the same either way.
+ * for efficiency the filesystem may cache the hash blocks. Therefore we need
+ * only ascend the tree until an already-verified hash block is seen, and then
+ * verify the path to that block.
*
- * Return: true if the page is valid, else false.
+ * Return: %true if the data block is valid, else %false.
*/
-static bool verify_page(struct inode *inode, const struct fsverity_info *vi,
- struct ahash_request *req, struct page *data_page,
- unsigned long level0_ra_pages)
+static bool
+verify_data_block(struct inode *inode, struct fsverity_info *vi,
+ struct ahash_request *req, struct page *data_page,
+ u64 data_pos, unsigned int dblock_offset_in_page,
+ unsigned long max_ra_pages)
{
const struct merkle_tree_params *params = &vi->tree_params;
const unsigned int hsize = params->digest_size;
- const pgoff_t index = data_page->index;
int level;
u8 _want_hash[FS_VERITY_MAX_DIGEST_SIZE];
const u8 *want_hash;
u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE];
- struct page *hpages[FS_VERITY_MAX_LEVELS];
- unsigned int hoffsets[FS_VERITY_MAX_LEVELS];
+ /* The hash blocks that are traversed, indexed by level */
+ struct {
+ /* Page containing the hash block */
+ struct page *page;
+ /* Index of the hash block in the tree overall */
+ unsigned long index;
+ /* Byte offset of the hash block within @page */
+ unsigned int offset_in_page;
+ /* Byte offset of the wanted hash within @page */
+ unsigned int hoffset;
+ } hblocks[FS_VERITY_MAX_LEVELS];
+ /*
+ * The index of the previous level's block within that level; also the
+ * index of that block's hash within the current level.
+ */
+ u64 hidx = data_pos >> params->log_blocksize;
int err;
- if (WARN_ON_ONCE(!PageLocked(data_page) || PageUptodate(data_page)))
- return false;
-
- pr_debug_ratelimited("Verifying data page %lu...\n", index);
+ if (unlikely(data_pos >= inode->i_size)) {
+ /*
+ * This can happen in the data page spanning EOF when the Merkle
+ * tree block size is less than the page size. The Merkle tree
+ * doesn't cover data blocks fully past EOF. But the entire
+ * page spanning EOF can be visible to userspace via a mmap, and
+ * any part past EOF should be all zeroes. Therefore, we need
+ * to verify that any data blocks fully past EOF are all zeroes.
+ */
+ return data_is_zeroed(inode, data_page, params->block_size,
+ dblock_offset_in_page);
+ }
/*
- * Starting at the leaf level, ascend the tree saving hash pages along
- * the way until we find a verified hash page, indicated by PageChecked;
- * or until we reach the root.
+ * Starting at the leaf level, ascend the tree saving hash blocks along
+ * the way until we find a hash block that has already been verified, or
+ * until we reach the root.
*/
for (level = 0; level < params->num_levels; level++) {
- pgoff_t hindex;
+ unsigned long next_hidx;
+ unsigned long hblock_idx;
+ pgoff_t hpage_idx;
+ unsigned int hblock_offset_in_page;
unsigned int hoffset;
struct page *hpage;
- hash_at_level(params, index, level, &hindex, &hoffset);
+ /*
+ * The index of the block in the current level; also the index
+ * of that block's hash within the next level.
+ */
+ next_hidx = hidx >> params->log_arity;
+
+ /* Index of the hash block in the tree overall */
+ hblock_idx = params->level_start[level] + next_hidx;
+
+ /* Index of the hash page in the tree overall */
+ hpage_idx = hblock_idx >> params->log_blocks_per_page;
- pr_debug_ratelimited("Level %d: hindex=%lu, hoffset=%u\n",
- level, hindex, hoffset);
+ /* Byte offset of the hash block within the page */
+ hblock_offset_in_page =
+ (hblock_idx << params->log_blocksize) & ~PAGE_MASK;
- hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode, hindex,
- level == 0 ? level0_ra_pages : 0);
+ /* Byte offset of the hash within the page */
+ hoffset = hblock_offset_in_page +
+ ((hidx << params->log_digestsize) &
+ (params->block_size - 1));
+
+ hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode,
+ hpage_idx, level == 0 ? min(max_ra_pages,
+ params->tree_pages - hpage_idx) : 0);
if (IS_ERR(hpage)) {
err = PTR_ERR(hpage);
fsverity_err(inode,
"Error %d reading Merkle tree page %lu",
- err, hindex);
+ err, hpage_idx);
goto out;
}
-
- if (PageChecked(hpage)) {
+ if (is_hash_block_verified(vi, hpage, hblock_idx)) {
memcpy_from_page(_want_hash, hpage, hoffset, hsize);
want_hash = _want_hash;
put_page(hpage);
- pr_debug_ratelimited("Hash page already checked, want %s:%*phN\n",
- params->hash_alg->name,
- hsize, want_hash);
goto descend;
}
- pr_debug_ratelimited("Hash page not yet checked\n");
- hpages[level] = hpage;
- hoffsets[level] = hoffset;
+ hblocks[level].page = hpage;
+ hblocks[level].index = hblock_idx;
+ hblocks[level].offset_in_page = hblock_offset_in_page;
+ hblocks[level].hoffset = hoffset;
+ hidx = next_hidx;
}
want_hash = vi->root_hash;
- pr_debug("Want root hash: %s:%*phN\n",
- params->hash_alg->name, hsize, want_hash);
descend:
- /* Descend the tree verifying hash pages */
+ /* Descend the tree verifying hash blocks. */
for (; level > 0; level--) {
- struct page *hpage = hpages[level - 1];
- unsigned int hoffset = hoffsets[level - 1];
-
- err = fsverity_hash_page(params, inode, req, hpage, real_hash);
+ struct page *hpage = hblocks[level - 1].page;
+ unsigned long hblock_idx = hblocks[level - 1].index;
+ unsigned int hblock_offset_in_page =
+ hblocks[level - 1].offset_in_page;
+ unsigned int hoffset = hblocks[level - 1].hoffset;
+
+ err = fsverity_hash_block(params, inode, req, hpage,
+ hblock_offset_in_page, real_hash);
if (err)
goto out;
- err = cmp_hashes(vi, want_hash, real_hash, index, level - 1);
+ err = cmp_hashes(vi, want_hash, real_hash, data_pos, level - 1);
if (err)
goto out;
- SetPageChecked(hpage);
+ /*
+ * Mark the hash block as verified. This must be atomic and
+ * idempotent, as the same hash block might be verified by
+ * multiple threads concurrently.
+ */
+ if (vi->hash_block_verified)
+ set_bit(hblock_idx, vi->hash_block_verified);
+ else
+ SetPageChecked(hpage);
memcpy_from_page(_want_hash, hpage, hoffset, hsize);
want_hash = _want_hash;
put_page(hpage);
- pr_debug("Verified hash page at level %d, now want %s:%*phN\n",
- level - 1, params->hash_alg->name, hsize, want_hash);
}
- /* Finally, verify the data page */
- err = fsverity_hash_page(params, inode, req, data_page, real_hash);
+ /* Finally, verify the data block. */
+ err = fsverity_hash_block(params, inode, req, data_page,
+ dblock_offset_in_page, real_hash);
if (err)
goto out;
- err = cmp_hashes(vi, want_hash, real_hash, index, -1);
+ err = cmp_hashes(vi, want_hash, real_hash, data_pos, -1);
out:
for (; level > 0; level--)
- put_page(hpages[level - 1]);
+ put_page(hblocks[level - 1].page);
return err == 0;
}
+static bool
+verify_data_blocks(struct inode *inode, struct fsverity_info *vi,
+ struct ahash_request *req, struct folio *data_folio,
+ size_t len, size_t offset, unsigned long max_ra_pages)
+{
+ const unsigned int block_size = vi->tree_params.block_size;
+ u64 pos = (u64)data_folio->index << PAGE_SHIFT;
+
+ if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offset, block_size)))
+ return false;
+ if (WARN_ON_ONCE(!folio_test_locked(data_folio) ||
+ folio_test_uptodate(data_folio)))
+ return false;
+ do {
+ struct page *data_page =
+ folio_page(data_folio, offset >> PAGE_SHIFT);
+
+ if (!verify_data_block(inode, vi, req, data_page, pos + offset,
+ offset & ~PAGE_MASK, max_ra_pages))
+ return false;
+ offset += block_size;
+ len -= block_size;
+ } while (len);
+ return true;
+}
+
/**
- * fsverity_verify_page() - verify a data page
- * @page: the page to verity
+ * fsverity_verify_blocks() - verify data in a folio
+ * @folio: the folio containing the data to verify
+ * @len: the length of the data to verify in the folio
+ * @offset: the offset of the data to verify in the folio
*
- * Verify a page that has just been read from a verity file. The page must be a
- * pagecache page that is still locked and not yet uptodate.
+ * Verify data that has just been read from a verity file. The data must be
+ * located in a pagecache folio that is still locked and not yet uptodate. The
+ * length and offset of the data must be Merkle tree block size aligned.
*
- * Return: true if the page is valid, else false.
+ * Return: %true if the data is valid, else %false.
*/
-bool fsverity_verify_page(struct page *page)
+bool fsverity_verify_blocks(struct folio *folio, size_t len, size_t offset)
{
- struct inode *inode = page->mapping->host;
- const struct fsverity_info *vi = inode->i_verity_info;
+ struct inode *inode = folio->mapping->host;
+ struct fsverity_info *vi = inode->i_verity_info;
struct ahash_request *req;
bool valid;
/* This allocation never fails, since it's mempool-backed. */
req = fsverity_alloc_hash_request(vi->tree_params.hash_alg, GFP_NOFS);
- valid = verify_page(inode, vi, req, page, 0);
+ valid = verify_data_blocks(inode, vi, req, folio, len, offset, 0);
fsverity_free_hash_request(vi->tree_params.hash_alg, req);
return valid;
}
-EXPORT_SYMBOL_GPL(fsverity_verify_page);
+EXPORT_SYMBOL_GPL(fsverity_verify_blocks);
#ifdef CONFIG_BLOCK
/**
* fsverity_verify_bio() - verify a 'read' bio that has just completed
* @bio: the bio to verify
*
- * Verify a set of pages that have just been read from a verity file. The pages
- * must be pagecache pages that are still locked and not yet uptodate. If a
- * page fails verification, then bio->bi_status is set to an error status.
+ * Verify the bio's data against the file's Merkle tree. All bio data segments
+ * must be aligned to the file's Merkle tree block size. If any data fails
+ * verification, then bio->bi_status is set to an error status.
*
* This is a helper function for use by the ->readahead() method of filesystems
* that issue bios to read data directly into the page cache. Filesystems that
@@ -212,15 +338,13 @@ EXPORT_SYMBOL_GPL(fsverity_verify_page);
void fsverity_verify_bio(struct bio *bio)
{
struct inode *inode = bio_first_page_all(bio)->mapping->host;
- const struct fsverity_info *vi = inode->i_verity_info;
- const struct merkle_tree_params *params = &vi->tree_params;
+ struct fsverity_info *vi = inode->i_verity_info;
struct ahash_request *req;
- struct bio_vec *bv;
- struct bvec_iter_all iter_all;
+ struct folio_iter fi;
unsigned long max_ra_pages = 0;
/* This allocation never fails, since it's mempool-backed. */
- req = fsverity_alloc_hash_request(params->hash_alg, GFP_NOFS);
+ req = fsverity_alloc_hash_request(vi->tree_params.hash_alg, GFP_NOFS);
if (bio->bi_opf & REQ_RAHEAD) {
/*
@@ -232,24 +356,18 @@ void fsverity_verify_bio(struct bio *bio)
* This improves sequential read performance, as it greatly
* reduces the number of I/O requests made to the Merkle tree.
*/
- bio_for_each_segment_all(bv, bio, iter_all)
- max_ra_pages++;
- max_ra_pages /= 4;
+ max_ra_pages = bio->bi_iter.bi_size >> (PAGE_SHIFT + 2);
}
- bio_for_each_segment_all(bv, bio, iter_all) {
- struct page *page = bv->bv_page;
- unsigned long level0_index = page->index >> params->log_arity;
- unsigned long level0_ra_pages =
- min(max_ra_pages, params->level0_blocks - level0_index);
-
- if (!verify_page(inode, vi, req, page, level0_ra_pages)) {
+ bio_for_each_folio_all(fi, bio) {
+ if (!verify_data_blocks(inode, vi, req, fi.folio, fi.length,
+ fi.offset, max_ra_pages)) {
bio->bi_status = BLK_STS_IOERR;
break;
}
}
- fsverity_free_hash_request(params->hash_alg, req);
+ fsverity_free_hash_request(vi->tree_params.hash_alg, req);
}
EXPORT_SYMBOL_GPL(fsverity_verify_bio);
#endif /* CONFIG_BLOCK */
diff --git a/fs/xattr.c b/fs/xattr.c
index adab9a70b536..14a7eb3c8fa8 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -9,6 +9,7 @@
Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
*/
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/xattr.h>
@@ -82,7 +83,7 @@ xattr_resolve_name(struct inode *inode, const char **name)
/**
* may_write_xattr - check whether inode allows writing xattr
- * @mnt_userns: User namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @inode: the inode on which to set an xattr
*
* Check whether the inode allows writing xattrs. Specifically, we can never
@@ -94,13 +95,13 @@ xattr_resolve_name(struct inode *inode, const char **name)
*
* Return: On success zero is returned. On error a negative errno is returned.
*/
-int may_write_xattr(struct user_namespace *mnt_userns, struct inode *inode)
+int may_write_xattr(struct mnt_idmap *idmap, struct inode *inode)
{
if (IS_IMMUTABLE(inode))
return -EPERM;
if (IS_APPEND(inode))
return -EPERM;
- if (HAS_UNMAPPED_ID(mnt_userns, inode))
+ if (HAS_UNMAPPED_ID(idmap, inode))
return -EPERM;
return 0;
}
@@ -110,13 +111,13 @@ int may_write_xattr(struct user_namespace *mnt_userns, struct inode *inode)
* because different namespaces have very different rules.
*/
static int
-xattr_permission(struct user_namespace *mnt_userns, struct inode *inode,
+xattr_permission(struct mnt_idmap *idmap, struct inode *inode,
const char *name, int mask)
{
if (mask & MAY_WRITE) {
int ret;
- ret = may_write_xattr(mnt_userns, inode);
+ ret = may_write_xattr(idmap, inode);
if (ret)
return ret;
}
@@ -148,11 +149,11 @@ xattr_permission(struct user_namespace *mnt_userns, struct inode *inode,
return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
(mask & MAY_WRITE) &&
- !inode_owner_or_capable(mnt_userns, inode))
+ !inode_owner_or_capable(idmap, inode))
return -EPERM;
}
- return inode_permission(mnt_userns, inode, mask);
+ return inode_permission(idmap, inode, mask);
}
/*
@@ -183,7 +184,7 @@ xattr_supported_namespace(struct inode *inode, const char *prefix)
EXPORT_SYMBOL(xattr_supported_namespace);
int
-__vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+__vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct inode *inode, const char *name, const void *value,
size_t size, int flags)
{
@@ -199,7 +200,7 @@ __vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
return -EOPNOTSUPP;
if (size == 0)
value = ""; /* empty EA, do not remove */
- return handler->set(handler, mnt_userns, dentry, inode, name, value,
+ return handler->set(handler, idmap, dentry, inode, name, value,
size, flags);
}
EXPORT_SYMBOL(__vfs_setxattr);
@@ -208,7 +209,7 @@ EXPORT_SYMBOL(__vfs_setxattr);
* __vfs_setxattr_noperm - perform setxattr operation without performing
* permission checks.
*
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dentry: object to perform setxattr on
* @name: xattr name to set
* @value: value to set @name to
@@ -221,7 +222,7 @@ EXPORT_SYMBOL(__vfs_setxattr);
* is executed. It also assumes that the caller will make the appropriate
* permission checks.
*/
-int __vfs_setxattr_noperm(struct user_namespace *mnt_userns,
+int __vfs_setxattr_noperm(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
@@ -233,7 +234,7 @@ int __vfs_setxattr_noperm(struct user_namespace *mnt_userns,
if (issec)
inode->i_flags &= ~S_NOSEC;
if (inode->i_opflags & IOP_XATTR) {
- error = __vfs_setxattr(mnt_userns, dentry, inode, name, value,
+ error = __vfs_setxattr(idmap, dentry, inode, name, value,
size, flags);
if (!error) {
fsnotify_xattr(dentry);
@@ -264,7 +265,7 @@ int __vfs_setxattr_noperm(struct user_namespace *mnt_userns,
* __vfs_setxattr_locked - set an extended attribute while holding the inode
* lock
*
- * @mnt_userns: user namespace of the mount of the target inode
+ * @idmap: idmap of the mount of the target inode
* @dentry: object to perform setxattr on
* @name: xattr name to set
* @value: value to set @name to
@@ -274,18 +275,18 @@ int __vfs_setxattr_noperm(struct user_namespace *mnt_userns,
* a delegation was broken on, NULL if none.
*/
int
-__vfs_setxattr_locked(struct user_namespace *mnt_userns, struct dentry *dentry,
+__vfs_setxattr_locked(struct mnt_idmap *idmap, struct dentry *dentry,
const char *name, const void *value, size_t size,
int flags, struct inode **delegated_inode)
{
struct inode *inode = dentry->d_inode;
int error;
- error = xattr_permission(mnt_userns, inode, name, MAY_WRITE);
+ error = xattr_permission(idmap, inode, name, MAY_WRITE);
if (error)
return error;
- error = security_inode_setxattr(mnt_userns, dentry, name, value, size,
+ error = security_inode_setxattr(idmap, dentry, name, value, size,
flags);
if (error)
goto out;
@@ -294,7 +295,7 @@ __vfs_setxattr_locked(struct user_namespace *mnt_userns, struct dentry *dentry,
if (error)
goto out;
- error = __vfs_setxattr_noperm(mnt_userns, dentry, name, value,
+ error = __vfs_setxattr_noperm(idmap, dentry, name, value,
size, flags);
out:
@@ -303,7 +304,7 @@ out:
EXPORT_SYMBOL_GPL(__vfs_setxattr_locked);
int
-vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
const char *name, const void *value, size_t size, int flags)
{
struct inode *inode = dentry->d_inode;
@@ -312,7 +313,7 @@ vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
int error;
if (size && strcmp(name, XATTR_NAME_CAPS) == 0) {
- error = cap_convert_nscap(mnt_userns, dentry, &value, size);
+ error = cap_convert_nscap(idmap, dentry, &value, size);
if (error < 0)
return error;
size = error;
@@ -320,7 +321,7 @@ vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
retry_deleg:
inode_lock(inode);
- error = __vfs_setxattr_locked(mnt_userns, dentry, name, value, size,
+ error = __vfs_setxattr_locked(idmap, dentry, name, value, size,
flags, &delegated_inode);
inode_unlock(inode);
@@ -337,19 +338,19 @@ retry_deleg:
EXPORT_SYMBOL_GPL(vfs_setxattr);
static ssize_t
-xattr_getsecurity(struct user_namespace *mnt_userns, struct inode *inode,
+xattr_getsecurity(struct mnt_idmap *idmap, struct inode *inode,
const char *name, void *value, size_t size)
{
void *buffer = NULL;
ssize_t len;
if (!value || !size) {
- len = security_inode_getsecurity(mnt_userns, inode, name,
+ len = security_inode_getsecurity(idmap, inode, name,
&buffer, false);
goto out_noalloc;
}
- len = security_inode_getsecurity(mnt_userns, inode, name, &buffer,
+ len = security_inode_getsecurity(idmap, inode, name, &buffer,
true);
if (len < 0)
return len;
@@ -374,7 +375,7 @@ out_noalloc:
* Returns the result of alloc, if failed, or the getxattr operation.
*/
int
-vfs_getxattr_alloc(struct user_namespace *mnt_userns, struct dentry *dentry,
+vfs_getxattr_alloc(struct mnt_idmap *idmap, struct dentry *dentry,
const char *name, char **xattr_value, size_t xattr_size,
gfp_t flags)
{
@@ -383,7 +384,7 @@ vfs_getxattr_alloc(struct user_namespace *mnt_userns, struct dentry *dentry,
char *value = *xattr_value;
int error;
- error = xattr_permission(mnt_userns, inode, name, MAY_READ);
+ error = xattr_permission(idmap, inode, name, MAY_READ);
if (error)
return error;
@@ -427,13 +428,13 @@ __vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name,
EXPORT_SYMBOL(__vfs_getxattr);
ssize_t
-vfs_getxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+vfs_getxattr(struct mnt_idmap *idmap, struct dentry *dentry,
const char *name, void *value, size_t size)
{
struct inode *inode = dentry->d_inode;
int error;
- error = xattr_permission(mnt_userns, inode, name, MAY_READ);
+ error = xattr_permission(idmap, inode, name, MAY_READ);
if (error)
return error;
@@ -444,7 +445,7 @@ vfs_getxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if (!strncmp(name, XATTR_SECURITY_PREFIX,
XATTR_SECURITY_PREFIX_LEN)) {
const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
- int ret = xattr_getsecurity(mnt_userns, inode, suffix, value,
+ int ret = xattr_getsecurity(idmap, inode, suffix, value,
size);
/*
* Only overwrite the return value if a security module
@@ -480,7 +481,7 @@ vfs_listxattr(struct dentry *dentry, char *list, size_t size)
EXPORT_SYMBOL_GPL(vfs_listxattr);
int
-__vfs_removexattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+__vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry,
const char *name)
{
struct inode *inode = d_inode(dentry);
@@ -494,7 +495,7 @@ __vfs_removexattr(struct user_namespace *mnt_userns, struct dentry *dentry,
return PTR_ERR(handler);
if (!handler->set)
return -EOPNOTSUPP;
- return handler->set(handler, mnt_userns, dentry, inode, name, NULL, 0,
+ return handler->set(handler, idmap, dentry, inode, name, NULL, 0,
XATTR_REPLACE);
}
EXPORT_SYMBOL(__vfs_removexattr);
@@ -503,25 +504,25 @@ EXPORT_SYMBOL(__vfs_removexattr);
* __vfs_removexattr_locked - set an extended attribute while holding the inode
* lock
*
- * @mnt_userns: user namespace of the mount of the target inode
+ * @idmap: idmap of the mount of the target inode
* @dentry: object to perform setxattr on
* @name: name of xattr to remove
* @delegated_inode: on return, will contain an inode pointer that
* a delegation was broken on, NULL if none.
*/
int
-__vfs_removexattr_locked(struct user_namespace *mnt_userns,
+__vfs_removexattr_locked(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name,
struct inode **delegated_inode)
{
struct inode *inode = dentry->d_inode;
int error;
- error = xattr_permission(mnt_userns, inode, name, MAY_WRITE);
+ error = xattr_permission(idmap, inode, name, MAY_WRITE);
if (error)
return error;
- error = security_inode_removexattr(mnt_userns, dentry, name);
+ error = security_inode_removexattr(idmap, dentry, name);
if (error)
goto out;
@@ -529,7 +530,7 @@ __vfs_removexattr_locked(struct user_namespace *mnt_userns,
if (error)
goto out;
- error = __vfs_removexattr(mnt_userns, dentry, name);
+ error = __vfs_removexattr(idmap, dentry, name);
if (!error) {
fsnotify_xattr(dentry);
@@ -542,7 +543,7 @@ out:
EXPORT_SYMBOL_GPL(__vfs_removexattr_locked);
int
-vfs_removexattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry,
const char *name)
{
struct inode *inode = dentry->d_inode;
@@ -551,7 +552,7 @@ vfs_removexattr(struct user_namespace *mnt_userns, struct dentry *dentry,
retry_deleg:
inode_lock(inode);
- error = __vfs_removexattr_locked(mnt_userns, dentry,
+ error = __vfs_removexattr_locked(idmap, dentry,
name, &delegated_inode);
inode_unlock(inode);
@@ -605,7 +606,7 @@ int do_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
return do_set_acl(idmap, dentry, ctx->kname->name,
ctx->kvalue, ctx->size);
- return vfs_setxattr(mnt_idmap_owner(idmap), dentry, ctx->kname->name,
+ return vfs_setxattr(idmap, dentry, ctx->kname->name,
ctx->kvalue, ctx->size, ctx->flags);
}
@@ -714,8 +715,7 @@ do_getxattr(struct mnt_idmap *idmap, struct dentry *d,
if (is_posix_acl_xattr(ctx->kname->name))
error = do_get_acl(idmap, d, kname, ctx->kvalue, ctx->size);
else
- error = vfs_getxattr(mnt_idmap_owner(idmap), d, kname,
- ctx->kvalue, ctx->size);
+ error = vfs_getxattr(idmap, d, kname, ctx->kvalue, ctx->size);
if (error > 0) {
if (ctx->size && copy_to_user(ctx->value, ctx->kvalue, error))
error = -EFAULT;
@@ -892,9 +892,9 @@ removexattr(struct mnt_idmap *idmap, struct dentry *d,
return error;
if (is_posix_acl_xattr(kname))
- return vfs_remove_acl(mnt_idmap_owner(idmap), d, kname);
+ return vfs_remove_acl(idmap, d, kname);
- return vfs_removexattr(mnt_idmap_owner(idmap), d, kname);
+ return vfs_removexattr(idmap, d, kname);
}
static int path_removexattr(const char __user *pathname,
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 4c16c8c31fcb..35f574421670 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4666,7 +4666,12 @@ xfs_btree_space_to_height(
const unsigned int *limits,
unsigned long long leaf_blocks)
{
- unsigned long long node_blocks = limits[1];
+ /*
+ * The root btree block can have fewer than minrecs pointers in it
+ * because the tree might not be big enough to require that amount of
+ * fanout. Hence it has a minimum size of 2 pointers, not limits[1].
+ */
+ unsigned long long node_blocks = 2;
unsigned long long blocks_left = leaf_blocks - 1;
unsigned int height = 1;
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index a05f44eb8178..791db7d9c849 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -242,7 +242,7 @@ xfs_acl_set_mode(
}
int
-xfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+xfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
umode_t mode;
@@ -258,7 +258,7 @@ xfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
return error;
if (type == ACL_TYPE_ACCESS) {
- error = posix_acl_update_mode(mnt_userns, inode, &mode, &acl);
+ error = posix_acl_update_mode(idmap, inode, &mode, &acl);
if (error)
return error;
set_mode = true;
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index dcd176149c7a..bf7f960997d3 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -11,7 +11,7 @@ struct posix_acl;
#ifdef CONFIG_XFS_POSIX_ACL
extern struct posix_acl *xfs_get_acl(struct inode *inode, int type, bool rcu);
-extern int xfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+extern int xfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type);
extern int __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
void xfs_forget_acl(struct inode *inode, const char *name);
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c
index ad22a003f959..f3d328e4a440 100644
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -236,6 +236,7 @@ xfs_extent_busy_update_extent(
*
*/
busyp->bno = fend;
+ busyp->length = bend - fend;
} else if (bbno < fbno) {
/*
* Case 8:
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 595a5bcf46b9..d06c0cc62f61 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1047,7 +1047,7 @@ xfs_file_fallocate(
iattr.ia_valid = ATTR_SIZE;
iattr.ia_size = new_size;
- error = xfs_vn_setattr_size(file_mnt_user_ns(file),
+ error = xfs_vn_setattr_size(file_mnt_idmap(file),
file_dentry(file), &iattr);
if (error)
goto out_unlock;
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index f35e2cee5265..ddeaccc04aec 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1853,12 +1853,20 @@ xfs_inodegc_worker(
struct xfs_inodegc, work);
struct llist_node *node = llist_del_all(&gc->list);
struct xfs_inode *ip, *n;
+ unsigned int nofs_flag;
WRITE_ONCE(gc->items, 0);
if (!node)
return;
+ /*
+ * We can allocate memory here while doing writeback on behalf of
+ * memory reclaim. To avoid memory allocation deadlocks set the
+ * task-wide nofs context for the following operations.
+ */
+ nofs_flag = memalloc_nofs_save();
+
ip = llist_entry(node, struct xfs_inode, i_gclist);
trace_xfs_inodegc_worker(ip->i_mount, READ_ONCE(gc->shrinker_hits));
@@ -1867,6 +1875,8 @@ xfs_inodegc_worker(
xfs_iflags_set(ip, XFS_INACTIVATING);
xfs_inodegc_inactivate(ip);
}
+
+ memalloc_nofs_restore(nofs_flag);
}
/*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d354ea2b74f9..7f1d715faab5 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -777,7 +777,7 @@ xfs_inode_inherit_flags2(
*/
int
xfs_init_new_inode(
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct xfs_trans *tp,
struct xfs_inode *pip,
xfs_ino_t ino,
@@ -823,11 +823,11 @@ xfs_init_new_inode(
ip->i_projid = prid;
if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) {
- inode_fsuid_set(inode, mnt_userns);
+ inode_fsuid_set(inode, idmap);
inode->i_gid = dir->i_gid;
inode->i_mode = mode;
} else {
- inode_init_owner(mnt_userns, inode, dir, mode);
+ inode_init_owner(idmap, inode, dir, mode);
}
/*
@@ -836,7 +836,7 @@ xfs_init_new_inode(
* (and only if the irix_sgid_inherit compatibility variable is set).
*/
if (irix_sgid_inherit && (inode->i_mode & S_ISGID) &&
- !vfsgid_in_group_p(i_gid_into_vfsgid(mnt_userns, inode)))
+ !vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode)))
inode->i_mode &= ~S_ISGID;
ip->i_disk_size = 0;
@@ -946,7 +946,7 @@ xfs_bumplink(
int
xfs_create(
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
xfs_inode_t *dp,
struct xfs_name *name,
umode_t mode,
@@ -978,8 +978,8 @@ xfs_create(
/*
* Make sure that we have allocated dquot(s) on disk.
*/
- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns),
- mapped_fsgid(mnt_userns, &init_user_ns), prid,
+ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns),
+ mapped_fsgid(idmap, &init_user_ns), prid,
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
&udqp, &gdqp, &pdqp);
if (error)
@@ -1020,7 +1020,7 @@ xfs_create(
*/
error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
if (!error)
- error = xfs_init_new_inode(mnt_userns, tp, dp, ino, mode,
+ error = xfs_init_new_inode(idmap, tp, dp, ino, mode,
is_dir ? 2 : 1, rdev, prid, init_xattrs, &ip);
if (error)
goto out_trans_cancel;
@@ -1102,7 +1102,7 @@ xfs_create(
int
xfs_create_tmpfile(
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct xfs_inode *dp,
umode_t mode,
struct xfs_inode **ipp)
@@ -1127,8 +1127,8 @@ xfs_create_tmpfile(
/*
* Make sure that we have allocated dquot(s) on disk.
*/
- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns),
- mapped_fsgid(mnt_userns, &init_user_ns), prid,
+ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns),
+ mapped_fsgid(idmap, &init_user_ns), prid,
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
&udqp, &gdqp, &pdqp);
if (error)
@@ -1144,7 +1144,7 @@ xfs_create_tmpfile(
error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
if (!error)
- error = xfs_init_new_inode(mnt_userns, tp, dp, ino, mode,
+ error = xfs_init_new_inode(idmap, tp, dp, ino, mode,
0, 0, prid, false, &ip);
if (error)
goto out_trans_cancel;
@@ -2709,7 +2709,7 @@ out_trans_abort:
*/
static int
xfs_rename_alloc_whiteout(
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct xfs_name *src_name,
struct xfs_inode *dp,
struct xfs_inode **wip)
@@ -2718,7 +2718,7 @@ xfs_rename_alloc_whiteout(
struct qstr name;
int error;
- error = xfs_create_tmpfile(mnt_userns, dp, S_IFCHR | WHITEOUT_MODE,
+ error = xfs_create_tmpfile(idmap, dp, S_IFCHR | WHITEOUT_MODE,
&tmpfile);
if (error)
return error;
@@ -2750,7 +2750,7 @@ xfs_rename_alloc_whiteout(
*/
int
xfs_rename(
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct xfs_inode *src_dp,
struct xfs_name *src_name,
struct xfs_inode *src_ip,
@@ -2782,7 +2782,7 @@ xfs_rename(
* appropriately.
*/
if (flags & RENAME_WHITEOUT) {
- error = xfs_rename_alloc_whiteout(mnt_userns, src_name,
+ error = xfs_rename_alloc_whiteout(idmap, src_name,
target_dp, &wip);
if (error)
return error;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index fa780f08dc89..69d21e42c10a 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -473,18 +473,18 @@ int xfs_release(struct xfs_inode *ip);
void xfs_inactive(struct xfs_inode *ip);
int xfs_lookup(struct xfs_inode *dp, const struct xfs_name *name,
struct xfs_inode **ipp, struct xfs_name *ci_name);
-int xfs_create(struct user_namespace *mnt_userns,
+int xfs_create(struct mnt_idmap *idmap,
struct xfs_inode *dp, struct xfs_name *name,
umode_t mode, dev_t rdev, bool need_xattr,
struct xfs_inode **ipp);
-int xfs_create_tmpfile(struct user_namespace *mnt_userns,
+int xfs_create_tmpfile(struct mnt_idmap *idmap,
struct xfs_inode *dp, umode_t mode,
struct xfs_inode **ipp);
int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
struct xfs_inode *ip);
int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
struct xfs_name *target_name);
-int xfs_rename(struct user_namespace *mnt_userns,
+int xfs_rename(struct mnt_idmap *idmap,
struct xfs_inode *src_dp, struct xfs_name *src_name,
struct xfs_inode *src_ip, struct xfs_inode *target_dp,
struct xfs_name *target_name,
@@ -515,7 +515,7 @@ void xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode,
xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip);
-int xfs_init_new_inode(struct user_namespace *mnt_userns, struct xfs_trans *tp,
+int xfs_init_new_inode(struct mnt_idmap *idmap, struct xfs_trans *tp,
struct xfs_inode *pip, xfs_ino_t ino, umode_t mode,
xfs_nlink_t nlink, dev_t rdev, prid_t prid, bool init_xattrs,
struct xfs_inode **ipp);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 13f1b2add390..55bb01173cde 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -665,7 +665,7 @@ xfs_ioc_fsbulkstat(
struct xfs_fsop_bulkreq bulkreq;
struct xfs_ibulk breq = {
.mp = mp,
- .mnt_userns = file_mnt_user_ns(file),
+ .idmap = file_mnt_idmap(file),
.ocount = 0,
};
xfs_ino_t lastino;
@@ -754,7 +754,7 @@ xfs_bulkstat_fmt(
static int
xfs_bulk_ireq_setup(
struct xfs_mount *mp,
- struct xfs_bulk_ireq *hdr,
+ const struct xfs_bulk_ireq *hdr,
struct xfs_ibulk *breq,
void __user *ubuffer)
{
@@ -780,7 +780,7 @@ xfs_bulk_ireq_setup(
switch (hdr->ino) {
case XFS_BULK_IREQ_SPECIAL_ROOT:
- hdr->ino = mp->m_sb.sb_rootino;
+ breq->startino = mp->m_sb.sb_rootino;
break;
default:
return -EINVAL;
@@ -844,7 +844,7 @@ xfs_ioc_bulkstat(
struct xfs_bulk_ireq hdr;
struct xfs_ibulk breq = {
.mp = mp,
- .mnt_userns = file_mnt_user_ns(file),
+ .idmap = file_mnt_idmap(file),
};
int error;
@@ -1297,7 +1297,7 @@ xfs_ioctl_setattr_check_projid(
int
xfs_fileattr_set(
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry,
struct fileattr *fa)
{
@@ -1371,7 +1371,7 @@ xfs_fileattr_set(
*/
if ((VFS_I(ip)->i_mode & (S_ISUID|S_ISGID)) &&
- !capable_wrt_inode_uidgid(mnt_userns, VFS_I(ip), CAP_FSETID))
+ !capable_wrt_inode_uidgid(idmap, VFS_I(ip), CAP_FSETID))
VFS_I(ip)->i_mode &= ~(S_ISUID|S_ISGID);
/* Change the ownerships and register project quota modifications */
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h
index d4abba2c13c1..38be600b5e1e 100644
--- a/fs/xfs/xfs_ioctl.h
+++ b/fs/xfs/xfs_ioctl.h
@@ -49,7 +49,7 @@ xfs_fileattr_get(
extern int
xfs_fileattr_set(
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry,
struct fileattr *fa);
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 2f54b701eead..ee35eea1ecce 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -204,7 +204,7 @@ xfs_compat_ioc_fsbulkstat(
struct xfs_fsop_bulkreq bulkreq;
struct xfs_ibulk breq = {
.mp = mp,
- .mnt_userns = file_mnt_user_ns(file),
+ .idmap = file_mnt_idmap(file),
.ocount = 0,
};
xfs_ino_t lastino;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 669c1bc5c3a7..fc1946f80a4a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -83,7 +83,7 @@ xfs_iomap_valid(
return true;
}
-const struct iomap_page_ops xfs_iomap_page_ops = {
+static const struct iomap_page_ops xfs_iomap_page_ops = {
.iomap_valid = xfs_iomap_valid,
};
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 515318dfbc38..24718adb3c16 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -162,12 +162,12 @@ xfs_create_need_xattr(
STATIC int
xfs_generic_create(
- struct user_namespace *mnt_userns,
- struct inode *dir,
- struct dentry *dentry,
- umode_t mode,
- dev_t rdev,
- struct file *tmpfile) /* unnamed file */
+ struct mnt_idmap *idmap,
+ struct inode *dir,
+ struct dentry *dentry,
+ umode_t mode,
+ dev_t rdev,
+ struct file *tmpfile) /* unnamed file */
{
struct inode *inode;
struct xfs_inode *ip = NULL;
@@ -196,11 +196,11 @@ xfs_generic_create(
goto out_free_acl;
if (!tmpfile) {
- error = xfs_create(mnt_userns, XFS_I(dir), &name, mode, rdev,
+ error = xfs_create(idmap, XFS_I(dir), &name, mode, rdev,
xfs_create_need_xattr(dir, default_acl, acl),
&ip);
} else {
- error = xfs_create_tmpfile(mnt_userns, XFS_I(dir), mode, &ip);
+ error = xfs_create_tmpfile(idmap, XFS_I(dir), mode, &ip);
}
if (unlikely(error))
goto out_free_acl;
@@ -255,35 +255,34 @@ xfs_generic_create(
STATIC int
xfs_vn_mknod(
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct inode *dir,
struct dentry *dentry,
umode_t mode,
dev_t rdev)
{
- return xfs_generic_create(mnt_userns, dir, dentry, mode, rdev, NULL);
+ return xfs_generic_create(idmap, dir, dentry, mode, rdev, NULL);
}
STATIC int
xfs_vn_create(
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct inode *dir,
struct dentry *dentry,
umode_t mode,
bool flags)
{
- return xfs_generic_create(mnt_userns, dir, dentry, mode, 0, NULL);
+ return xfs_generic_create(idmap, dir, dentry, mode, 0, NULL);
}
STATIC int
xfs_vn_mkdir(
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct inode *dir,
struct dentry *dentry,
umode_t mode)
{
- return xfs_generic_create(mnt_userns, dir, dentry, mode | S_IFDIR, 0,
- NULL);
+ return xfs_generic_create(idmap, dir, dentry, mode | S_IFDIR, 0, NULL);
}
STATIC struct dentry *
@@ -400,7 +399,7 @@ xfs_vn_unlink(
STATIC int
xfs_vn_symlink(
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct inode *dir,
struct dentry *dentry,
const char *symname)
@@ -417,7 +416,7 @@ xfs_vn_symlink(
if (unlikely(error))
goto out;
- error = xfs_symlink(mnt_userns, XFS_I(dir), &name, symname, mode, &cip);
+ error = xfs_symlink(idmap, XFS_I(dir), &name, symname, mode, &cip);
if (unlikely(error))
goto out;
@@ -443,7 +442,7 @@ xfs_vn_symlink(
STATIC int
xfs_vn_rename(
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct inode *odir,
struct dentry *odentry,
struct inode *ndir,
@@ -472,7 +471,7 @@ xfs_vn_rename(
if (unlikely(error))
return error;
- return xfs_rename(mnt_userns, XFS_I(odir), &oname,
+ return xfs_rename(idmap, XFS_I(odir), &oname,
XFS_I(d_inode(odentry)), XFS_I(ndir), &nname,
new_inode ? XFS_I(new_inode) : NULL, flags);
}
@@ -549,7 +548,7 @@ xfs_stat_blksize(
STATIC int
xfs_vn_getattr(
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
const struct path *path,
struct kstat *stat,
u32 request_mask,
@@ -558,8 +557,8 @@ xfs_vn_getattr(
struct inode *inode = d_inode(path->dentry);
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
- vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
- vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
+ vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
+ vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
trace_xfs_getattr(ip);
@@ -627,7 +626,7 @@ xfs_vn_getattr(
static int
xfs_vn_change_ok(
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry,
struct iattr *iattr)
{
@@ -639,7 +638,7 @@ xfs_vn_change_ok(
if (xfs_is_shutdown(mp))
return -EIO;
- return setattr_prepare(mnt_userns, dentry, iattr);
+ return setattr_prepare(idmap, dentry, iattr);
}
/*
@@ -650,7 +649,7 @@ xfs_vn_change_ok(
*/
static int
xfs_setattr_nonsize(
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry,
struct xfs_inode *ip,
struct iattr *iattr)
@@ -679,14 +678,14 @@ xfs_setattr_nonsize(
uint qflags = 0;
if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
- uid = from_vfsuid(mnt_userns, i_user_ns(inode),
+ uid = from_vfsuid(idmap, i_user_ns(inode),
iattr->ia_vfsuid);
qflags |= XFS_QMOPT_UQUOTA;
} else {
uid = inode->i_uid;
}
if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
- gid = from_vfsgid(mnt_userns, i_user_ns(inode),
+ gid = from_vfsgid(idmap, i_user_ns(inode),
iattr->ia_vfsgid);
qflags |= XFS_QMOPT_GQUOTA;
} else {
@@ -719,18 +718,18 @@ xfs_setattr_nonsize(
* also.
*/
if (XFS_IS_UQUOTA_ON(mp) &&
- i_uid_needs_update(mnt_userns, iattr, inode)) {
+ i_uid_needs_update(idmap, iattr, inode)) {
ASSERT(udqp);
old_udqp = xfs_qm_vop_chown(tp, ip, &ip->i_udquot, udqp);
}
if (XFS_IS_GQUOTA_ON(mp) &&
- i_gid_needs_update(mnt_userns, iattr, inode)) {
+ i_gid_needs_update(idmap, iattr, inode)) {
ASSERT(xfs_has_pquotino(mp) || !XFS_IS_PQUOTA_ON(mp));
ASSERT(gdqp);
old_gdqp = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp);
}
- setattr_copy(mnt_userns, inode, iattr);
+ setattr_copy(idmap, inode, iattr);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
XFS_STATS_INC(mp, xs_ig_attrchg);
@@ -758,7 +757,7 @@ xfs_setattr_nonsize(
* Posix ACL code seems to care about this issue either.
*/
if (mask & ATTR_MODE) {
- error = posix_acl_chmod(mnt_userns, dentry, inode->i_mode);
+ error = posix_acl_chmod(idmap, dentry, inode->i_mode);
if (error)
return error;
}
@@ -779,7 +778,7 @@ out_dqrele:
*/
STATIC int
xfs_setattr_size(
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry,
struct xfs_inode *ip,
struct iattr *iattr)
@@ -812,7 +811,7 @@ xfs_setattr_size(
* Use the regular setattr path to update the timestamps.
*/
iattr->ia_valid &= ~ATTR_SIZE;
- return xfs_setattr_nonsize(mnt_userns, dentry, ip, iattr);
+ return xfs_setattr_nonsize(idmap, dentry, ip, iattr);
}
/*
@@ -956,7 +955,7 @@ xfs_setattr_size(
}
ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID)));
- setattr_copy(mnt_userns, inode, iattr);
+ setattr_copy(idmap, inode, iattr);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
XFS_STATS_INC(mp, xs_ig_attrchg);
@@ -977,7 +976,7 @@ out_trans_cancel:
int
xfs_vn_setattr_size(
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry,
struct iattr *iattr)
{
@@ -986,15 +985,15 @@ xfs_vn_setattr_size(
trace_xfs_setattr(ip);
- error = xfs_vn_change_ok(mnt_userns, dentry, iattr);
+ error = xfs_vn_change_ok(idmap, dentry, iattr);
if (error)
return error;
- return xfs_setattr_size(mnt_userns, dentry, ip, iattr);
+ return xfs_setattr_size(idmap, dentry, ip, iattr);
}
STATIC int
xfs_vn_setattr(
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry,
struct iattr *iattr)
{
@@ -1014,14 +1013,14 @@ xfs_vn_setattr(
return error;
}
- error = xfs_vn_setattr_size(mnt_userns, dentry, iattr);
+ error = xfs_vn_setattr_size(idmap, dentry, iattr);
xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
} else {
trace_xfs_setattr(ip);
- error = xfs_vn_change_ok(mnt_userns, dentry, iattr);
+ error = xfs_vn_change_ok(idmap, dentry, iattr);
if (!error)
- error = xfs_setattr_nonsize(mnt_userns, dentry, ip, iattr);
+ error = xfs_setattr_nonsize(idmap, dentry, ip, iattr);
}
return error;
@@ -1092,12 +1091,12 @@ xfs_vn_fiemap(
STATIC int
xfs_vn_tmpfile(
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct inode *dir,
struct file *file,
umode_t mode)
{
- int err = xfs_generic_create(mnt_userns, dir, file->f_path.dentry, mode, 0, file);
+ int err = xfs_generic_create(idmap, dir, file->f_path.dentry, mode, 0, file);
return finish_open_simple(file, err);
}
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
index e570dcb5df8d..7f84a0843b24 100644
--- a/fs/xfs/xfs_iops.h
+++ b/fs/xfs/xfs_iops.h
@@ -13,7 +13,7 @@ extern const struct file_operations xfs_dir_file_operations;
extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
-int xfs_vn_setattr_size(struct user_namespace *mnt_userns,
+int xfs_vn_setattr_size(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *vap);
int xfs_inode_init_security(struct inode *inode, struct inode *dir,
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index a1c2bcf65d37..f225413a993c 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -55,7 +55,7 @@ struct xfs_bstat_chunk {
STATIC int
xfs_bulkstat_one_int(
struct xfs_mount *mp,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct xfs_trans *tp,
xfs_ino_t ino,
struct xfs_bstat_chunk *bc)
@@ -83,8 +83,8 @@ xfs_bulkstat_one_int(
ASSERT(ip != NULL);
ASSERT(ip->i_imap.im_blkno != 0);
inode = VFS_I(ip);
- vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
- vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
+ vfsuid = i_uid_into_vfsuid(idmap, inode);
+ vfsgid = i_gid_into_vfsgid(idmap, inode);
/* xfs_iget returns the following without needing
* further change.
@@ -178,7 +178,7 @@ xfs_bulkstat_one(
struct xfs_trans *tp;
int error;
- if (breq->mnt_userns != &init_user_ns) {
+ if (breq->idmap != &nop_mnt_idmap) {
xfs_warn_ratelimited(breq->mp,
"bulkstat not supported inside of idmapped mounts.");
return -EINVAL;
@@ -199,7 +199,7 @@ xfs_bulkstat_one(
if (error)
goto out;
- error = xfs_bulkstat_one_int(breq->mp, breq->mnt_userns, tp,
+ error = xfs_bulkstat_one_int(breq->mp, breq->idmap, tp,
breq->startino, &bc);
xfs_trans_cancel(tp);
out:
@@ -225,7 +225,7 @@ xfs_bulkstat_iwalk(
struct xfs_bstat_chunk *bc = data;
int error;
- error = xfs_bulkstat_one_int(mp, bc->breq->mnt_userns, tp, ino, data);
+ error = xfs_bulkstat_one_int(mp, bc->breq->idmap, tp, ino, data);
/* bulkstat just skips over missing inodes */
if (error == -ENOENT || error == -EINVAL)
return 0;
@@ -270,7 +270,7 @@ xfs_bulkstat(
unsigned int iwalk_flags = 0;
int error;
- if (breq->mnt_userns != &init_user_ns) {
+ if (breq->idmap != &nop_mnt_idmap) {
xfs_warn_ratelimited(breq->mp,
"bulkstat not supported inside of idmapped mounts.");
return -EINVAL;
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index e2d0eba43f35..1659f13f17a8 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -8,7 +8,7 @@
/* In-memory representation of a userspace request for batch inode data. */
struct xfs_ibulk {
struct xfs_mount *mp;
- struct user_namespace *mnt_userns;
+ struct mnt_idmap *idmap;
void __user *ubuffer; /* user output buffer */
xfs_ino_t startino; /* start with this inode */
unsigned int icount; /* number of elements in ubuffer */
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index f9878021e7d0..e88f18f85e4b 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -34,6 +34,7 @@ typedef __u32 xfs_nlink_t;
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/file.h>
+#include <linux/filelock.h>
#include <linux/swap.h>
#include <linux/errno.h>
#include <linux/sched/signal.h>
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 38d23f0e703a..23d16186e1a3 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -322,7 +322,7 @@ xfs_fs_commit_blocks(
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID)));
- setattr_copy(&init_user_ns, inode, iattr);
+ setattr_copy(&nop_mnt_idmap, inode, iattr);
if (update_isize) {
i_size_write(inode, iattr->ia_size);
ip->i_disk_size = iattr->ia_size;
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index ff53d40a2dae..7dc0db7f5a76 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -68,7 +68,7 @@ restart:
while (1) {
struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH];
- int error = 0;
+ int error;
int i;
mutex_lock(&qi->qi_tree_lock);
@@ -787,7 +787,7 @@ xfs_qm_qino_alloc(
error = xfs_dialloc(&tp, 0, S_IFREG, &ino);
if (!error)
- error = xfs_init_new_inode(&init_user_ns, tp, NULL, ino,
+ error = xfs_init_new_inode(&nop_mnt_idmap, tp, NULL, ino,
S_IFREG, 1, 0, 0, false, ipp);
if (error) {
xfs_trans_cancel(tp);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index fe46bce8cae6..5535778a98f9 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -416,8 +416,6 @@ xfs_reflink_fill_cow_hole(
goto convert;
}
- ASSERT(cmap->br_startoff > imap->br_startoff);
-
/* Allocate the entire reservation as unwritten blocks. */
nimaps = 1;
error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 8389f3ef88ef..85e433df6a3f 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -144,7 +144,7 @@ xfs_readlink(
int
xfs_symlink(
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct xfs_inode *dp,
struct xfs_name *link_name,
const char *target_path,
@@ -193,8 +193,8 @@ xfs_symlink(
/*
* Make sure that we have allocated dquot(s) on disk.
*/
- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns),
- mapped_fsgid(mnt_userns, &init_user_ns), prid,
+ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns),
+ mapped_fsgid(idmap, &init_user_ns), prid,
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
&udqp, &gdqp, &pdqp);
if (error)
@@ -231,7 +231,7 @@ xfs_symlink(
*/
error = xfs_dialloc(&tp, dp->i_ino, S_IFLNK, &ino);
if (!error)
- error = xfs_init_new_inode(mnt_userns, tp, dp, ino,
+ error = xfs_init_new_inode(idmap, tp, dp, ino,
S_IFLNK | (mode & ~S_IFMT), 1, 0, prid,
false, &ip);
if (error)
diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h
index 2586b7e393f3..d1ca1ce62a93 100644
--- a/fs/xfs/xfs_symlink.h
+++ b/fs/xfs/xfs_symlink.h
@@ -7,7 +7,7 @@
/* Kernel only symlink definitions */
-int xfs_symlink(struct user_namespace *mnt_userns, struct xfs_inode *dp,
+int xfs_symlink(struct mnt_idmap *idmap, struct xfs_inode *dp,
struct xfs_name *link_name, const char *target_path,
umode_t mode, struct xfs_inode **ipp);
int xfs_readlink_bmap_ilocked(struct xfs_inode *ip, char *link);
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 10aa1fd39d2b..7b9a0ed1b11f 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -133,7 +133,7 @@ xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused,
static int
xfs_xattr_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns, struct dentry *unused,
+ struct mnt_idmap *idmap, struct dentry *unused,
struct inode *inode, const char *name, const void *value,
size_t size, int flags)
{
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 2c53fbb8d918..72ef97320b99 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -442,6 +442,10 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
data_size = zonefs_check_zone_condition(inode, zone,
false, false);
}
+ } else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO &&
+ data_size > isize) {
+ /* Do not expose garbage data */
+ data_size = isize;
}
/*
@@ -600,7 +604,7 @@ unlock:
return ret;
}
-static int zonefs_inode_setattr(struct user_namespace *mnt_userns,
+static int zonefs_inode_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
@@ -609,7 +613,7 @@ static int zonefs_inode_setattr(struct user_namespace *mnt_userns,
if (unlikely(IS_IMMUTABLE(inode)))
return -EPERM;
- ret = setattr_prepare(&init_user_ns, dentry, iattr);
+ ret = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
if (ret)
return ret;
@@ -626,7 +630,7 @@ static int zonefs_inode_setattr(struct user_namespace *mnt_userns,
!uid_eq(iattr->ia_uid, inode->i_uid)) ||
((iattr->ia_valid & ATTR_GID) &&
!gid_eq(iattr->ia_gid, inode->i_gid))) {
- ret = dquot_transfer(mnt_userns, inode, iattr);
+ ret = dquot_transfer(&nop_mnt_idmap, inode, iattr);
if (ret)
return ret;
}
@@ -637,7 +641,7 @@ static int zonefs_inode_setattr(struct user_namespace *mnt_userns,
return ret;
}
- setattr_copy(&init_user_ns, inode, iattr);
+ setattr_copy(&nop_mnt_idmap, inode, iattr);
return 0;
}
@@ -805,6 +809,24 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
ret = submit_bio_wait(bio);
+ /*
+ * If the file zone was written underneath the file system, the zone
+ * write pointer may not be where we expect it to be, but the zone
+ * append write can still succeed. So check manually that we wrote where
+ * we intended to, that is, at zi->i_wpoffset.
+ */
+ if (!ret) {
+ sector_t wpsector =
+ zi->i_zsector + (zi->i_wpoffset >> SECTOR_SHIFT);
+
+ if (bio->bi_iter.bi_sector != wpsector) {
+ zonefs_warn(inode->i_sb,
+ "Corrupted write pointer %llu for zone at %llu\n",
+ wpsector, zi->i_zsector);
+ ret = -EIO;
+ }
+ }
+
zonefs_file_write_dio_end_io(iocb, size, ret, 0);
trace_zonefs_file_dio_append(inode, size, ret);
@@ -1405,7 +1427,7 @@ static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode,
struct super_block *sb = parent->i_sb;
inode->i_ino = bdev_nr_zones(sb->s_bdev) + type + 1;
- inode_init_owner(&init_user_ns, inode, parent, S_IFDIR | 0555);
+ inode_init_owner(&nop_mnt_idmap, inode, parent, S_IFDIR | 0555);
inode->i_op = &zonefs_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
set_nlink(inode, 2);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index cd3b75e08ec3..e44be31115a6 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -230,7 +230,8 @@ struct acpi_pnp_type {
u32 hardware_id:1;
u32 bus_address:1;
u32 platform_id:1;
- u32 reserved:29;
+ u32 backlight:1;
+ u32 reserved:28;
};
struct acpi_device_pnp {
diff --git a/include/acpi/actbl3.h b/include/acpi/actbl3.h
index 7b9571e00cc4..832c6464f063 100644
--- a/include/acpi/actbl3.h
+++ b/include/acpi/actbl3.h
@@ -443,6 +443,7 @@ struct acpi_tpm2_phy {
#define ACPI_TPM2_RESERVED10 10
#define ACPI_TPM2_COMMAND_BUFFER_WITH_ARM_SMC 11 /* V1.2 Rev 8 */
#define ACPI_TPM2_RESERVED 12
+#define ACPI_TPM2_COMMAND_BUFFER_WITH_PLUTON 13
/* Optional trailer appears after any start_method subtables */
diff --git a/include/drm/drm_client.h b/include/drm/drm_client.h
index 4fc8018eddda..1220d185c776 100644
--- a/include/drm/drm_client.h
+++ b/include/drm/drm_client.h
@@ -127,11 +127,6 @@ struct drm_client_buffer {
struct drm_client_dev *client;
/**
- * @handle: Buffer handle
- */
- u32 handle;
-
- /**
* @pitch: Buffer pitch
*/
u32 pitch;
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index b111dc7ada78..095370ef029d 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -208,6 +208,18 @@ struct drm_fb_helper {
* the smem_start field should always be cleared to zero.
*/
bool hint_leak_smem_start;
+
+#ifdef CONFIG_FB_DEFERRED_IO
+ /**
+ * @fbdefio:
+ *
+ * Temporary storage for the driver's FB deferred I/O handler. If the
+ * driver uses the DRM fbdev emulation layer, this is set by the core
+ * to a generic deferred I/O handler if a driver is preferring to use
+ * a shadow buffer.
+ */
+ struct fb_deferred_io fbdefio;
+#endif
};
static inline struct drm_fb_helper *
diff --git a/include/drm/drm_plane_helper.h b/include/drm/drm_plane_helper.h
index ff83d2621687..3a574e8cd22f 100644
--- a/include/drm/drm_plane_helper.h
+++ b/include/drm/drm_plane_helper.h
@@ -26,6 +26,7 @@
#include <linux/types.h>
+struct drm_atomic_state;
struct drm_crtc;
struct drm_framebuffer;
struct drm_modeset_acquire_ctx;
diff --git a/include/drm/drm_vma_manager.h b/include/drm/drm_vma_manager.h
index 4f8c35206f7c..6c2a2f21dbf0 100644
--- a/include/drm/drm_vma_manager.h
+++ b/include/drm/drm_vma_manager.h
@@ -74,6 +74,7 @@ void drm_vma_offset_remove(struct drm_vma_offset_manager *mgr,
struct drm_vma_offset_node *node);
int drm_vma_node_allow(struct drm_vma_offset_node *node, struct drm_file *tag);
+int drm_vma_node_allow_once(struct drm_vma_offset_node *node, struct drm_file *tag);
void drm_vma_node_revoke(struct drm_vma_offset_node *node,
struct drm_file *tag);
bool drm_vma_node_is_allowed(struct drm_vma_offset_node *node,
diff --git a/include/kunit/test.h b/include/kunit/test.h
index 87ea90576b50..08d3559dd703 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -303,7 +303,6 @@ static inline int kunit_run_all_tests(void)
*/
#define kunit_test_init_section_suites(__suites...) \
__kunit_test_suites(CONCATENATE(__UNIQUE_ID(array), _probe), \
- CONCATENATE(__UNIQUE_ID(suites), _probe), \
##__suites)
#define kunit_test_init_section_suite(suite) \
@@ -683,8 +682,9 @@ do { \
.right_text = #right, \
}; \
\
- if (likely(memcmp(__left, __right, __size) op 0)) \
- break; \
+ if (likely(__left && __right)) \
+ if (likely(memcmp(__left, __right, __size) op 0)) \
+ break; \
\
_KUNIT_FAILED(test, \
assert_type, \
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 9270cd87da3f..6470f67e63c4 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -263,7 +263,7 @@ struct vgic_dist {
struct vgic_io_device dist_iodev;
bool has_its;
- bool save_its_tables_in_progress;
+ bool table_write_in_progress;
/*
* Contains the attributes and gpa of the LPI configuration table.
diff --git a/include/linux/apple-gmux.h b/include/linux/apple-gmux.h
index ddb10aa67b14..1f68b49bcd68 100644
--- a/include/linux/apple-gmux.h
+++ b/include/linux/apple-gmux.h
@@ -8,18 +8,118 @@
#define LINUX_APPLE_GMUX_H
#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/pnp.h>
#define GMUX_ACPI_HID "APP000B"
+/*
+ * gmux port offsets. Many of these are not yet used, but may be in the
+ * future, and it's useful to have them documented here anyhow.
+ */
+#define GMUX_PORT_VERSION_MAJOR 0x04
+#define GMUX_PORT_VERSION_MINOR 0x05
+#define GMUX_PORT_VERSION_RELEASE 0x06
+#define GMUX_PORT_SWITCH_DISPLAY 0x10
+#define GMUX_PORT_SWITCH_GET_DISPLAY 0x11
+#define GMUX_PORT_INTERRUPT_ENABLE 0x14
+#define GMUX_PORT_INTERRUPT_STATUS 0x16
+#define GMUX_PORT_SWITCH_DDC 0x28
+#define GMUX_PORT_SWITCH_EXTERNAL 0x40
+#define GMUX_PORT_SWITCH_GET_EXTERNAL 0x41
+#define GMUX_PORT_DISCRETE_POWER 0x50
+#define GMUX_PORT_MAX_BRIGHTNESS 0x70
+#define GMUX_PORT_BRIGHTNESS 0x74
+#define GMUX_PORT_VALUE 0xc2
+#define GMUX_PORT_READ 0xd0
+#define GMUX_PORT_WRITE 0xd4
+
+#define GMUX_MIN_IO_LEN (GMUX_PORT_BRIGHTNESS + 4)
+
#if IS_ENABLED(CONFIG_APPLE_GMUX)
+static inline bool apple_gmux_is_indexed(unsigned long iostart)
+{
+ u16 val;
+
+ outb(0xaa, iostart + 0xcc);
+ outb(0x55, iostart + 0xcd);
+ outb(0x00, iostart + 0xce);
+
+ val = inb(iostart + 0xcc) | (inb(iostart + 0xcd) << 8);
+ if (val == 0x55aa)
+ return true;
+
+ return false;
+}
/**
- * apple_gmux_present() - detect if gmux is built into the machine
+ * apple_gmux_detect() - detect if gmux is built into the machine
+ *
+ * @pnp_dev: Device to probe or NULL to use the first matching device
+ * @indexed_ret: Returns (by reference) if the gmux is indexed or not
+ *
+ * Detect if a supported gmux device is present by actually probing it.
+ * This avoids the false positives returned on some models by
+ * apple_gmux_present().
+ *
+ * Return: %true if a supported gmux ACPI device is detected and the kernel
+ * was configured with CONFIG_APPLE_GMUX, %false otherwise.
+ */
+static inline bool apple_gmux_detect(struct pnp_dev *pnp_dev, bool *indexed_ret)
+{
+ u8 ver_major, ver_minor, ver_release;
+ struct device *dev = NULL;
+ struct acpi_device *adev;
+ struct resource *res;
+ bool indexed = false;
+ bool ret = false;
+
+ if (!pnp_dev) {
+ adev = acpi_dev_get_first_match_dev(GMUX_ACPI_HID, NULL, -1);
+ if (!adev)
+ return false;
+
+ dev = get_device(acpi_get_first_physical_node(adev));
+ acpi_dev_put(adev);
+ if (!dev)
+ return false;
+
+ pnp_dev = to_pnp_dev(dev);
+ }
+
+ res = pnp_get_resource(pnp_dev, IORESOURCE_IO, 0);
+ if (!res || resource_size(res) < GMUX_MIN_IO_LEN)
+ goto out;
+
+ /*
+ * Invalid version information may indicate either that the gmux
+ * device isn't present or that it's a new one that uses indexed io.
+ */
+ ver_major = inb(res->start + GMUX_PORT_VERSION_MAJOR);
+ ver_minor = inb(res->start + GMUX_PORT_VERSION_MINOR);
+ ver_release = inb(res->start + GMUX_PORT_VERSION_RELEASE);
+ if (ver_major == 0xff && ver_minor == 0xff && ver_release == 0xff) {
+ indexed = apple_gmux_is_indexed(res->start);
+ if (!indexed)
+ goto out;
+ }
+
+ if (indexed_ret)
+ *indexed_ret = indexed;
+
+ ret = true;
+out:
+ put_device(dev);
+ return ret;
+}
+
+/**
+ * apple_gmux_present() - check if gmux ACPI device is present
*
* Drivers may use this to activate quirks specific to dual GPU MacBook Pros
* and Mac Pros, e.g. for deferred probing, runtime pm and backlight.
*
- * Return: %true if gmux is present and the kernel was configured
+ * Return: %true if gmux ACPI device is present and the kernel was configured
* with CONFIG_APPLE_GMUX, %false otherwise.
*/
static inline bool apple_gmux_present(void)
@@ -34,6 +134,11 @@ static inline bool apple_gmux_present(void)
return false;
}
+static inline bool apple_gmux_detect(struct pnp_dev *pnp_dev, bool *indexed_ret)
+{
+ return false;
+}
+
#endif /* !CONFIG_APPLE_GMUX */
#endif /* LINUX_APPLE_GMUX_H */
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 3608992848d3..31086a72e32a 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -14,6 +14,7 @@
#include <linux/audit_arch.h>
#include <uapi/linux/audit.h>
#include <uapi/linux/netfilter/nf_tables.h>
+#include <uapi/linux/fanotify.h>
#define AUDIT_INO_UNSET ((unsigned long)-1)
#define AUDIT_DEV_UNSET ((dev_t)-1)
@@ -416,7 +417,7 @@ extern void __audit_log_capset(const struct cred *new, const struct cred *old);
extern void __audit_mmap_fd(int fd, int flags);
extern void __audit_openat2_how(struct open_how *how);
extern void __audit_log_kern_module(char *name);
-extern void __audit_fanotify(unsigned int response);
+extern void __audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar);
extern void __audit_tk_injoffset(struct timespec64 offset);
extern void __audit_ntp_log(const struct audit_ntp_data *ad);
extern void __audit_log_nfcfg(const char *name, u8 af, unsigned int nentries,
@@ -523,10 +524,10 @@ static inline void audit_log_kern_module(char *name)
__audit_log_kern_module(name);
}
-static inline void audit_fanotify(unsigned int response)
+static inline void audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar)
{
if (!audit_dummy_context())
- __audit_fanotify(response);
+ __audit_fanotify(response, friar);
}
static inline void audit_tk_injoffset(struct timespec64 offset)
@@ -679,7 +680,7 @@ static inline void audit_log_kern_module(char *name)
{
}
-static inline void audit_fanotify(unsigned int response)
+static inline void audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar)
{ }
static inline void audit_tk_injoffset(struct timespec64 offset)
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 22078a28d7cb..d766be7152e1 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -12,6 +12,8 @@
#define BIO_MAX_VECS 256U
+struct queue_limits;
+
static inline unsigned int bio_max_segs(unsigned int nr_segs)
{
return min(nr_segs, BIO_MAX_VECS);
@@ -375,6 +377,8 @@ static inline void bip_set_seed(struct bio_integrity_payload *bip,
void bio_trim(struct bio *bio, sector_t offset, sector_t size);
extern struct bio *bio_split(struct bio *bio, int sectors,
gfp_t gfp, struct bio_set *bs);
+struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
+ unsigned *segs, struct bio_set *bs, unsigned max_bytes);
/**
* bio_next_split - get next @sectors from a bio, splitting if necessary
@@ -475,6 +479,8 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty);
extern void bio_set_pages_dirty(struct bio *bio);
extern void bio_check_pages_dirty(struct bio *bio);
+extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
+ struct bio *src, struct bvec_iter *src_iter);
extern void bio_copy_data(struct bio *dst, struct bio *src);
extern void bio_free_pages(struct bio *bio);
void guard_bio_eod(struct bio *bio);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 301cf1cf4f2f..43d4e073b111 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1395,6 +1395,7 @@ struct block_device_operations {
void (*swap_slot_free_notify) (struct block_device *, unsigned long);
int (*report_zones)(struct gendisk *, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
+ char *(*devnode)(struct gendisk *disk, umode_t *mode);
/* returns the length of the identifier or a negative errno: */
int (*get_unique_id)(struct gendisk *disk, u8 id[16],
enum blk_unique_id id_type);
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3de24cfb7a3d..634d37a599fa 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1832,7 +1832,7 @@ void bpf_prog_inc(struct bpf_prog *prog);
struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog);
void bpf_prog_put(struct bpf_prog *prog);
-void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
+void bpf_prog_free_id(struct bpf_prog *prog);
void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);
struct btf_field *btf_record_find(const struct btf_record *rec,
diff --git a/include/linux/capability.h b/include/linux/capability.h
index 65efb74c3585..03c2a613ad40 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -42,6 +42,7 @@ struct inode;
struct dentry;
struct task_struct;
struct user_namespace;
+struct mnt_idmap;
extern const kernel_cap_t __cap_empty_set;
extern const kernel_cap_t __cap_init_eff_set;
@@ -248,9 +249,9 @@ static inline bool ns_capable_setid(struct user_namespace *ns, int cap)
}
#endif /* CONFIG_MULTIUSER */
bool privileged_wrt_inode_uidgid(struct user_namespace *ns,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
const struct inode *inode);
-bool capable_wrt_inode_uidgid(struct user_namespace *mnt_userns,
+bool capable_wrt_inode_uidgid(struct mnt_idmap *idmap,
const struct inode *inode, int cap);
extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);
extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns);
@@ -271,11 +272,11 @@ static inline bool checkpoint_restore_ns_capable(struct user_namespace *ns)
}
/* audit system wants to get cap info from files as well */
-int get_vfs_caps_from_disk(struct user_namespace *mnt_userns,
+int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
const struct dentry *dentry,
struct cpu_vfs_cap_data *cpu_caps);
-int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry,
+int cap_convert_nscap(struct mnt_idmap *idmap, struct dentry *dentry,
const void **ivalue, size_t size);
#endif /* !_LINUX_CAPABILITY_H */
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 00af2c98da75..4497d0a6772c 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -99,16 +99,6 @@ struct ceph_options {
#define CEPH_AUTH_NAME_DEFAULT "guest"
-/* mount state */
-enum {
- CEPH_MOUNT_MOUNTING,
- CEPH_MOUNT_MOUNTED,
- CEPH_MOUNT_UNMOUNTING,
- CEPH_MOUNT_UNMOUNTED,
- CEPH_MOUNT_SHUTDOWN,
- CEPH_MOUNT_RECOVER,
-};
-
static inline unsigned long ceph_timeout_jiffies(unsigned long timeout)
{
return timeout ?: MAX_SCHEDULE_TIMEOUT;
diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h
index b1b5720d89a5..ee657452f122 100644
--- a/include/linux/dsa/tag_qca.h
+++ b/include/linux/dsa/tag_qca.h
@@ -45,8 +45,8 @@ struct sk_buff;
QCA_HDR_MGMT_COMMAND_LEN + \
QCA_HDR_MGMT_DATA1_LEN)
-#define QCA_HDR_MGMT_DATA2_LEN 12 /* Other 12 byte for the mdio data */
-#define QCA_HDR_MGMT_PADDING_LEN 34 /* Padding to reach the min Ethernet packet */
+#define QCA_HDR_MGMT_DATA2_LEN 28 /* Other 28 byte for the mdio data */
+#define QCA_HDR_MGMT_PADDING_LEN 18 /* Padding to reach the min Ethernet packet */
#define QCA_HDR_MGMT_PKT_LEN (QCA_HDR_MGMT_HEADER_LEN + \
QCA_HDR_LEN + \
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 4b27519143f5..98598bd1d2fa 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -668,7 +668,8 @@ extern struct efi {
#define EFI_RT_SUPPORTED_ALL 0x3fff
-#define EFI_RT_SUPPORTED_TIME_SERVICES 0x000f
+#define EFI_RT_SUPPORTED_TIME_SERVICES 0x0003
+#define EFI_RT_SUPPORTED_WAKEUP_SERVICES 0x000c
#define EFI_RT_SUPPORTED_VARIABLE_SERVICES 0x0070
extern struct mm_struct efi_mm;
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 9ec81290e3c8..bd5560542c79 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -105,14 +105,14 @@ int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu);
* Dumping its extra ELF program headers includes all the other information
* a debugger needs to easily find how the gate DSO was being used.
*/
-extern Elf_Half elf_core_extra_phdrs(void);
+extern Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm);
extern int
elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset);
extern int
elf_core_write_extra_data(struct coredump_params *cprm);
-extern size_t elf_core_extra_data_size(void);
+extern size_t elf_core_extra_data_size(struct coredump_params *cprm);
#else
-static inline Elf_Half elf_core_extra_phdrs(void)
+static inline Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm)
{
return 0;
}
@@ -127,7 +127,7 @@ static inline int elf_core_write_extra_data(struct coredump_params *cprm)
return 1;
}
-static inline size_t elf_core_extra_data_size(void)
+static inline size_t elf_core_extra_data_size(struct coredump_params *cprm)
{
return 0;
}
diff --git a/include/linux/evm.h b/include/linux/evm.h
index 7a9ee2157f69..7dc1ee74169f 100644
--- a/include/linux/evm.h
+++ b/include/linux/evm.h
@@ -21,34 +21,34 @@ extern enum integrity_status evm_verifyxattr(struct dentry *dentry,
void *xattr_value,
size_t xattr_value_len,
struct integrity_iint_cache *iint);
-extern int evm_inode_setattr(struct user_namespace *mnt_userns,
+extern int evm_inode_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr);
extern void evm_inode_post_setattr(struct dentry *dentry, int ia_valid);
-extern int evm_inode_setxattr(struct user_namespace *mnt_userns,
+extern int evm_inode_setxattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name,
const void *value, size_t size);
extern void evm_inode_post_setxattr(struct dentry *dentry,
const char *xattr_name,
const void *xattr_value,
size_t xattr_value_len);
-extern int evm_inode_removexattr(struct user_namespace *mnt_userns,
+extern int evm_inode_removexattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *xattr_name);
extern void evm_inode_post_removexattr(struct dentry *dentry,
const char *xattr_name);
-static inline void evm_inode_post_remove_acl(struct user_namespace *mnt_userns,
+static inline void evm_inode_post_remove_acl(struct mnt_idmap *idmap,
struct dentry *dentry,
const char *acl_name)
{
evm_inode_post_removexattr(dentry, acl_name);
}
-extern int evm_inode_set_acl(struct user_namespace *mnt_userns,
+extern int evm_inode_set_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name,
struct posix_acl *kacl);
-static inline int evm_inode_remove_acl(struct user_namespace *mnt_userns,
+static inline int evm_inode_remove_acl(struct mnt_idmap *idmap,
struct dentry *dentry,
const char *acl_name)
{
- return evm_inode_set_acl(mnt_userns, dentry, acl_name, NULL);
+ return evm_inode_set_acl(idmap, dentry, acl_name, NULL);
}
static inline void evm_inode_post_set_acl(struct dentry *dentry,
const char *acl_name,
@@ -90,7 +90,7 @@ static inline enum integrity_status evm_verifyxattr(struct dentry *dentry,
}
#endif
-static inline int evm_inode_setattr(struct user_namespace *mnt_userns,
+static inline int evm_inode_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr)
{
return 0;
@@ -101,7 +101,7 @@ static inline void evm_inode_post_setattr(struct dentry *dentry, int ia_valid)
return;
}
-static inline int evm_inode_setxattr(struct user_namespace *mnt_userns,
+static inline int evm_inode_setxattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name,
const void *value, size_t size)
{
@@ -116,7 +116,7 @@ static inline void evm_inode_post_setxattr(struct dentry *dentry,
return;
}
-static inline int evm_inode_removexattr(struct user_namespace *mnt_userns,
+static inline int evm_inode_removexattr(struct mnt_idmap *idmap,
struct dentry *dentry,
const char *xattr_name)
{
@@ -129,21 +129,21 @@ static inline void evm_inode_post_removexattr(struct dentry *dentry,
return;
}
-static inline void evm_inode_post_remove_acl(struct user_namespace *mnt_userns,
+static inline void evm_inode_post_remove_acl(struct mnt_idmap *idmap,
struct dentry *dentry,
const char *acl_name)
{
return;
}
-static inline int evm_inode_set_acl(struct user_namespace *mnt_userns,
+static inline int evm_inode_set_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name,
struct posix_acl *kacl)
{
return 0;
}
-static inline int evm_inode_remove_acl(struct user_namespace *mnt_userns,
+static inline int evm_inode_remove_acl(struct mnt_idmap *idmap,
struct dentry *dentry,
const char *acl_name)
{
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index fe848901fcc3..9f4d4bcbf251 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -213,7 +213,6 @@ struct export_operations {
bool write, u32 *device_generation);
int (*commit_blocks)(struct inode *inode, struct iomap *iomaps,
int nr_iomaps, struct iattr *iattr);
- u64 (*fetch_iversion)(struct inode *);
#define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */
#define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */
#define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 8ad743def6f3..4f1c4f603118 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -122,6 +122,11 @@
#define ALL_FANOTIFY_EVENT_BITS (FANOTIFY_OUTGOING_EVENTS | \
FANOTIFY_EVENT_FLAGS)
+/* These masks check for invalid bits in permission responses. */
+#define FANOTIFY_RESPONSE_ACCESS (FAN_ALLOW | FAN_DENY)
+#define FANOTIFY_RESPONSE_FLAGS (FAN_AUDIT | FAN_INFO)
+#define FANOTIFY_RESPONSE_VALID_MASK (FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS)
+
/* Do not use these old uapi constants internally */
#undef FAN_ALL_CLASS_BITS
#undef FAN_ALL_INIT_FLAGS
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 96b96323e9cb..73eb1f85ea8e 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -662,6 +662,7 @@ extern int fb_deferred_io_init(struct fb_info *info);
extern void fb_deferred_io_open(struct fb_info *info,
struct inode *inode,
struct file *file);
+extern void fb_deferred_io_release(struct fb_info *info);
extern void fb_deferred_io_cleanup(struct fb_info *info);
extern int fb_deferred_io_fsync(struct file *file, loff_t start,
loff_t end, int datasync);
diff --git a/include/linux/fileattr.h b/include/linux/fileattr.h
index 9e37e063ac69..47c05a9851d0 100644
--- a/include/linux/fileattr.h
+++ b/include/linux/fileattr.h
@@ -53,7 +53,7 @@ static inline bool fileattr_has_fsx(const struct fileattr *fa)
}
int vfs_fileattr_get(struct dentry *dentry, struct fileattr *fa);
-int vfs_fileattr_set(struct user_namespace *mnt_userns, struct dentry *dentry,
+int vfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry,
struct fileattr *fa);
#endif /* _LINUX_FILEATTR_H */
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
new file mode 100644
index 000000000000..efcdd1631d9b
--- /dev/null
+++ b/include/linux/filelock.h
@@ -0,0 +1,439 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_FILELOCK_H
+#define _LINUX_FILELOCK_H
+
+#include <linux/fs.h>
+
+#define FL_POSIX 1
+#define FL_FLOCK 2
+#define FL_DELEG 4 /* NFSv4 delegation */
+#define FL_ACCESS 8 /* not trying to lock, just looking */
+#define FL_EXISTS 16 /* when unlocking, test for existence */
+#define FL_LEASE 32 /* lease held on this file */
+#define FL_CLOSE 64 /* unlock on close */
+#define FL_SLEEP 128 /* A blocking lock */
+#define FL_DOWNGRADE_PENDING 256 /* Lease is being downgraded */
+#define FL_UNLOCK_PENDING 512 /* Lease is being broken */
+#define FL_OFDLCK 1024 /* lock is "owned" by struct file */
+#define FL_LAYOUT 2048 /* outstanding pNFS layout */
+#define FL_RECLAIM 4096 /* reclaiming from a reboot server */
+
+#define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
+
+/*
+ * Special return value from posix_lock_file() and vfs_lock_file() for
+ * asynchronous locking.
+ */
+#define FILE_LOCK_DEFERRED 1
+
+struct file_lock;
+
+struct file_lock_operations {
+ void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
+ void (*fl_release_private)(struct file_lock *);
+};
+
+struct lock_manager_operations {
+ void *lm_mod_owner;
+ fl_owner_t (*lm_get_owner)(fl_owner_t);
+ void (*lm_put_owner)(fl_owner_t);
+ void (*lm_notify)(struct file_lock *); /* unblock callback */
+ int (*lm_grant)(struct file_lock *, int);
+ bool (*lm_break)(struct file_lock *);
+ int (*lm_change)(struct file_lock *, int, struct list_head *);
+ void (*lm_setup)(struct file_lock *, void **);
+ bool (*lm_breaker_owns_lease)(struct file_lock *);
+ bool (*lm_lock_expirable)(struct file_lock *cfl);
+ void (*lm_expire_lock)(void);
+};
+
+struct lock_manager {
+ struct list_head list;
+ /*
+ * NFSv4 and up also want opens blocked during the grace period;
+ * NLM doesn't care:
+ */
+ bool block_opens;
+};
+
+struct net;
+void locks_start_grace(struct net *, struct lock_manager *);
+void locks_end_grace(struct lock_manager *);
+bool locks_in_grace(struct net *);
+bool opens_in_grace(struct net *);
+
+/*
+ * struct file_lock has a union that some filesystems use to track
+ * their own private info. The NFS side of things is defined here:
+ */
+#include <linux/nfs_fs_i.h>
+
+/*
+ * struct file_lock represents a generic "file lock". It's used to represent
+ * POSIX byte range locks, BSD (flock) locks, and leases. It's important to
+ * note that the same struct is used to represent both a request for a lock and
+ * the lock itself, but the same object is never used for both.
+ *
+ * FIXME: should we create a separate "struct lock_request" to help distinguish
+ * these two uses?
+ *
+ * The varous i_flctx lists are ordered by:
+ *
+ * 1) lock owner
+ * 2) lock range start
+ * 3) lock range end
+ *
+ * Obviously, the last two criteria only matter for POSIX locks.
+ */
+struct file_lock {
+ struct file_lock *fl_blocker; /* The lock, that is blocking us */
+ struct list_head fl_list; /* link into file_lock_context */
+ struct hlist_node fl_link; /* node in global lists */
+ struct list_head fl_blocked_requests; /* list of requests with
+ * ->fl_blocker pointing here
+ */
+ struct list_head fl_blocked_member; /* node in
+ * ->fl_blocker->fl_blocked_requests
+ */
+ fl_owner_t fl_owner;
+ unsigned int fl_flags;
+ unsigned char fl_type;
+ unsigned int fl_pid;
+ int fl_link_cpu; /* what cpu's list is this on? */
+ wait_queue_head_t fl_wait;
+ struct file *fl_file;
+ loff_t fl_start;
+ loff_t fl_end;
+
+ struct fasync_struct * fl_fasync; /* for lease break notifications */
+ /* for lease breaks: */
+ unsigned long fl_break_time;
+ unsigned long fl_downgrade_time;
+
+ const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */
+ const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */
+ union {
+ struct nfs_lock_info nfs_fl;
+ struct nfs4_lock_info nfs4_fl;
+ struct {
+ struct list_head link; /* link in AFS vnode's pending_locks list */
+ int state; /* state of grant or error if -ve */
+ unsigned int debug_id;
+ } afs;
+ struct {
+ struct inode *inode;
+ } ceph;
+ } fl_u;
+} __randomize_layout;
+
+struct file_lock_context {
+ spinlock_t flc_lock;
+ struct list_head flc_flock;
+ struct list_head flc_posix;
+ struct list_head flc_lease;
+};
+
+#ifdef CONFIG_FILE_LOCKING
+int fcntl_getlk(struct file *, unsigned int, struct flock *);
+int fcntl_setlk(unsigned int, struct file *, unsigned int,
+ struct flock *);
+
+#if BITS_PER_LONG == 32
+int fcntl_getlk64(struct file *, unsigned int, struct flock64 *);
+int fcntl_setlk64(unsigned int, struct file *, unsigned int,
+ struct flock64 *);
+#endif
+
+int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
+int fcntl_getlease(struct file *filp);
+
+/* fs/locks.c */
+void locks_free_lock_context(struct inode *inode);
+void locks_free_lock(struct file_lock *fl);
+void locks_init_lock(struct file_lock *);
+struct file_lock * locks_alloc_lock(void);
+void locks_copy_lock(struct file_lock *, struct file_lock *);
+void locks_copy_conflock(struct file_lock *, struct file_lock *);
+void locks_remove_posix(struct file *, fl_owner_t);
+void locks_remove_file(struct file *);
+void locks_release_private(struct file_lock *);
+void posix_test_lock(struct file *, struct file_lock *);
+int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
+int locks_delete_block(struct file_lock *);
+int vfs_test_lock(struct file *, struct file_lock *);
+int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
+int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
+bool vfs_inode_has_locks(struct inode *inode);
+int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl);
+int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
+void lease_get_mtime(struct inode *, struct timespec64 *time);
+int generic_setlease(struct file *, long, struct file_lock **, void **priv);
+int vfs_setlease(struct file *, long, struct file_lock **, void **);
+int lease_modify(struct file_lock *, int, struct list_head *);
+
+struct notifier_block;
+int lease_register_notifier(struct notifier_block *);
+void lease_unregister_notifier(struct notifier_block *);
+
+struct files_struct;
+void show_fd_locks(struct seq_file *f,
+ struct file *filp, struct files_struct *files);
+bool locks_owner_has_blockers(struct file_lock_context *flctx,
+ fl_owner_t owner);
+
+static inline struct file_lock_context *
+locks_inode_context(const struct inode *inode)
+{
+ return smp_load_acquire(&inode->i_flctx);
+}
+
+#else /* !CONFIG_FILE_LOCKING */
+static inline int fcntl_getlk(struct file *file, unsigned int cmd,
+ struct flock __user *user)
+{
+ return -EINVAL;
+}
+
+static inline int fcntl_setlk(unsigned int fd, struct file *file,
+ unsigned int cmd, struct flock __user *user)
+{
+ return -EACCES;
+}
+
+#if BITS_PER_LONG == 32
+static inline int fcntl_getlk64(struct file *file, unsigned int cmd,
+ struct flock64 *user)
+{
+ return -EINVAL;
+}
+
+static inline int fcntl_setlk64(unsigned int fd, struct file *file,
+ unsigned int cmd, struct flock64 *user)
+{
+ return -EACCES;
+}
+#endif
+static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
+{
+ return -EINVAL;
+}
+
+static inline int fcntl_getlease(struct file *filp)
+{
+ return F_UNLCK;
+}
+
+static inline void
+locks_free_lock_context(struct inode *inode)
+{
+}
+
+static inline void locks_init_lock(struct file_lock *fl)
+{
+ return;
+}
+
+static inline void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
+{
+ return;
+}
+
+static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+{
+ return;
+}
+
+static inline void locks_remove_posix(struct file *filp, fl_owner_t owner)
+{
+ return;
+}
+
+static inline void locks_remove_file(struct file *filp)
+{
+ return;
+}
+
+static inline void posix_test_lock(struct file *filp, struct file_lock *fl)
+{
+ return;
+}
+
+static inline int posix_lock_file(struct file *filp, struct file_lock *fl,
+ struct file_lock *conflock)
+{
+ return -ENOLCK;
+}
+
+static inline int locks_delete_block(struct file_lock *waiter)
+{
+ return -ENOENT;
+}
+
+static inline int vfs_test_lock(struct file *filp, struct file_lock *fl)
+{
+ return 0;
+}
+
+static inline int vfs_lock_file(struct file *filp, unsigned int cmd,
+ struct file_lock *fl, struct file_lock *conf)
+{
+ return -ENOLCK;
+}
+
+static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
+{
+ return 0;
+}
+
+static inline bool vfs_inode_has_locks(struct inode *inode)
+{
+ return false;
+}
+
+static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
+{
+ return -ENOLCK;
+}
+
+static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
+{
+ return 0;
+}
+
+static inline void lease_get_mtime(struct inode *inode,
+ struct timespec64 *time)
+{
+ return;
+}
+
+static inline int generic_setlease(struct file *filp, long arg,
+ struct file_lock **flp, void **priv)
+{
+ return -EINVAL;
+}
+
+static inline int vfs_setlease(struct file *filp, long arg,
+ struct file_lock **lease, void **priv)
+{
+ return -EINVAL;
+}
+
+static inline int lease_modify(struct file_lock *fl, int arg,
+ struct list_head *dispose)
+{
+ return -EINVAL;
+}
+
+struct files_struct;
+static inline void show_fd_locks(struct seq_file *f,
+ struct file *filp, struct files_struct *files) {}
+static inline bool locks_owner_has_blockers(struct file_lock_context *flctx,
+ fl_owner_t owner)
+{
+ return false;
+}
+
+static inline struct file_lock_context *
+locks_inode_context(const struct inode *inode)
+{
+ return NULL;
+}
+
+#endif /* !CONFIG_FILE_LOCKING */
+
+static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
+{
+ return locks_lock_inode_wait(file_inode(filp), fl);
+}
+
+#ifdef CONFIG_FILE_LOCKING
+static inline int break_lease(struct inode *inode, unsigned int mode)
+{
+ /*
+ * Since this check is lockless, we must ensure that any refcounts
+ * taken are done before checking i_flctx->flc_lease. Otherwise, we
+ * could end up racing with tasks trying to set a new lease on this
+ * file.
+ */
+ smp_mb();
+ if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease))
+ return __break_lease(inode, mode, FL_LEASE);
+ return 0;
+}
+
+static inline int break_deleg(struct inode *inode, unsigned int mode)
+{
+ /*
+ * Since this check is lockless, we must ensure that any refcounts
+ * taken are done before checking i_flctx->flc_lease. Otherwise, we
+ * could end up racing with tasks trying to set a new lease on this
+ * file.
+ */
+ smp_mb();
+ if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease))
+ return __break_lease(inode, mode, FL_DELEG);
+ return 0;
+}
+
+static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
+{
+ int ret;
+
+ ret = break_deleg(inode, O_WRONLY|O_NONBLOCK);
+ if (ret == -EWOULDBLOCK && delegated_inode) {
+ *delegated_inode = inode;
+ ihold(inode);
+ }
+ return ret;
+}
+
+static inline int break_deleg_wait(struct inode **delegated_inode)
+{
+ int ret;
+
+ ret = break_deleg(*delegated_inode, O_WRONLY);
+ iput(*delegated_inode);
+ *delegated_inode = NULL;
+ return ret;
+}
+
+static inline int break_layout(struct inode *inode, bool wait)
+{
+ smp_mb();
+ if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease))
+ return __break_lease(inode,
+ wait ? O_WRONLY : O_WRONLY | O_NONBLOCK,
+ FL_LAYOUT);
+ return 0;
+}
+
+#else /* !CONFIG_FILE_LOCKING */
+static inline int break_lease(struct inode *inode, unsigned int mode)
+{
+ return 0;
+}
+
+static inline int break_deleg(struct inode *inode, unsigned int mode)
+{
+ return 0;
+}
+
+static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
+{
+ return 0;
+}
+
+static inline int break_deleg_wait(struct inode **delegated_inode)
+{
+ BUG();
+ return 0;
+}
+
+static inline int break_layout(struct inode *inode, bool wait)
+{
+ return 0;
+}
+
+#endif /* CONFIG_FILE_LOCKING */
+
+#endif /* _LINUX_FILELOCK_H */
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index b986e267d149..b09f443d3ab9 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -545,8 +545,8 @@ int zynqmp_pm_request_wake(const u32 node,
const u64 address,
const enum zynqmp_pm_request_ack ack);
int zynqmp_pm_get_rpu_mode(u32 node_id, enum rpu_oper_mode *rpu_mode);
-int zynqmp_pm_set_rpu_mode(u32 node_id, u32 arg1);
-int zynqmp_pm_set_tcm_config(u32 node_id, u32 arg1);
+int zynqmp_pm_set_rpu_mode(u32 node_id, enum rpu_oper_mode rpu_mode);
+int zynqmp_pm_set_tcm_config(u32 node_id, enum rpu_tcm_comb tcm_mode);
int zynqmp_pm_set_sd_config(u32 node, enum pm_sd_config_type config, u32 value);
int zynqmp_pm_set_gem_config(u32 node, enum pm_gem_config_type config,
u32 value);
@@ -845,12 +845,12 @@ static inline int zynqmp_pm_get_rpu_mode(u32 node_id, enum rpu_oper_mode *rpu_mo
return -ENODEV;
}
-static inline int zynqmp_pm_set_rpu_mode(u32 node_id, u32 arg1)
+static inline int zynqmp_pm_set_rpu_mode(u32 node_id, enum rpu_oper_mode rpu_mode)
{
return -ENODEV;
}
-static inline int zynqmp_pm_set_tcm_config(u32 node_id, u32 arg1)
+static inline int zynqmp_pm_set_tcm_config(u32 node_id, enum rpu_tcm_comb tcm_mode)
{
return -ENODEV;
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 066555ad1bf8..2acc46fb5f97 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1003,132 +1003,11 @@ static inline struct file *get_file(struct file *f)
#define MAX_LFS_FILESIZE ((loff_t)LLONG_MAX)
#endif
-#define FL_POSIX 1
-#define FL_FLOCK 2
-#define FL_DELEG 4 /* NFSv4 delegation */
-#define FL_ACCESS 8 /* not trying to lock, just looking */
-#define FL_EXISTS 16 /* when unlocking, test for existence */
-#define FL_LEASE 32 /* lease held on this file */
-#define FL_CLOSE 64 /* unlock on close */
-#define FL_SLEEP 128 /* A blocking lock */
-#define FL_DOWNGRADE_PENDING 256 /* Lease is being downgraded */
-#define FL_UNLOCK_PENDING 512 /* Lease is being broken */
-#define FL_OFDLCK 1024 /* lock is "owned" by struct file */
-#define FL_LAYOUT 2048 /* outstanding pNFS layout */
-#define FL_RECLAIM 4096 /* reclaiming from a reboot server */
-
-#define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
-
-/*
- * Special return value from posix_lock_file() and vfs_lock_file() for
- * asynchronous locking.
- */
-#define FILE_LOCK_DEFERRED 1
-
/* legacy typedef, should eventually be removed */
typedef void *fl_owner_t;
struct file_lock;
-struct file_lock_operations {
- void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
- void (*fl_release_private)(struct file_lock *);
-};
-
-struct lock_manager_operations {
- void *lm_mod_owner;
- fl_owner_t (*lm_get_owner)(fl_owner_t);
- void (*lm_put_owner)(fl_owner_t);
- void (*lm_notify)(struct file_lock *); /* unblock callback */
- int (*lm_grant)(struct file_lock *, int);
- bool (*lm_break)(struct file_lock *);
- int (*lm_change)(struct file_lock *, int, struct list_head *);
- void (*lm_setup)(struct file_lock *, void **);
- bool (*lm_breaker_owns_lease)(struct file_lock *);
- bool (*lm_lock_expirable)(struct file_lock *cfl);
- void (*lm_expire_lock)(void);
-};
-
-struct lock_manager {
- struct list_head list;
- /*
- * NFSv4 and up also want opens blocked during the grace period;
- * NLM doesn't care:
- */
- bool block_opens;
-};
-
-struct net;
-void locks_start_grace(struct net *, struct lock_manager *);
-void locks_end_grace(struct lock_manager *);
-bool locks_in_grace(struct net *);
-bool opens_in_grace(struct net *);
-
-/* that will die - we need it for nfs_lock_info */
-#include <linux/nfs_fs_i.h>
-
-/*
- * struct file_lock represents a generic "file lock". It's used to represent
- * POSIX byte range locks, BSD (flock) locks, and leases. It's important to
- * note that the same struct is used to represent both a request for a lock and
- * the lock itself, but the same object is never used for both.
- *
- * FIXME: should we create a separate "struct lock_request" to help distinguish
- * these two uses?
- *
- * The varous i_flctx lists are ordered by:
- *
- * 1) lock owner
- * 2) lock range start
- * 3) lock range end
- *
- * Obviously, the last two criteria only matter for POSIX locks.
- */
-struct file_lock {
- struct file_lock *fl_blocker; /* The lock, that is blocking us */
- struct list_head fl_list; /* link into file_lock_context */
- struct hlist_node fl_link; /* node in global lists */
- struct list_head fl_blocked_requests; /* list of requests with
- * ->fl_blocker pointing here
- */
- struct list_head fl_blocked_member; /* node in
- * ->fl_blocker->fl_blocked_requests
- */
- fl_owner_t fl_owner;
- unsigned int fl_flags;
- unsigned char fl_type;
- unsigned int fl_pid;
- int fl_link_cpu; /* what cpu's list is this on? */
- wait_queue_head_t fl_wait;
- struct file *fl_file;
- loff_t fl_start;
- loff_t fl_end;
-
- struct fasync_struct * fl_fasync; /* for lease break notifications */
- /* for lease breaks: */
- unsigned long fl_break_time;
- unsigned long fl_downgrade_time;
-
- const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */
- const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */
- union {
- struct nfs_lock_info nfs_fl;
- struct nfs4_lock_info nfs4_fl;
- struct {
- struct list_head link; /* link in AFS vnode's pending_locks list */
- int state; /* state of grant or error if -ve */
- unsigned int debug_id;
- } afs;
- } fl_u;
-} __randomize_layout;
-
-struct file_lock_context {
- spinlock_t flc_lock;
- struct list_head flc_flock;
- struct list_head flc_posix;
- struct list_head flc_lease;
-};
-
/* The following constant reflects the upper bound of the file/locking space */
#ifndef OFFSET_MAX
#define OFFSET_MAX type_max(loff_t)
@@ -1137,216 +1016,6 @@ struct file_lock_context {
extern void send_sigio(struct fown_struct *fown, int fd, int band);
-#define locks_inode(f) file_inode(f)
-
-#ifdef CONFIG_FILE_LOCKING
-extern int fcntl_getlk(struct file *, unsigned int, struct flock *);
-extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
- struct flock *);
-
-#if BITS_PER_LONG == 32
-extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 *);
-extern int fcntl_setlk64(unsigned int, struct file *, unsigned int,
- struct flock64 *);
-#endif
-
-extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
-extern int fcntl_getlease(struct file *filp);
-
-/* fs/locks.c */
-void locks_free_lock_context(struct inode *inode);
-void locks_free_lock(struct file_lock *fl);
-extern void locks_init_lock(struct file_lock *);
-extern struct file_lock * locks_alloc_lock(void);
-extern void locks_copy_lock(struct file_lock *, struct file_lock *);
-extern void locks_copy_conflock(struct file_lock *, struct file_lock *);
-extern void locks_remove_posix(struct file *, fl_owner_t);
-extern void locks_remove_file(struct file *);
-extern void locks_release_private(struct file_lock *);
-extern void posix_test_lock(struct file *, struct file_lock *);
-extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
-extern int locks_delete_block(struct file_lock *);
-extern int vfs_test_lock(struct file *, struct file_lock *);
-extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
-extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
-bool vfs_inode_has_locks(struct inode *inode);
-extern int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl);
-extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
-extern void lease_get_mtime(struct inode *, struct timespec64 *time);
-extern int generic_setlease(struct file *, long, struct file_lock **, void **priv);
-extern int vfs_setlease(struct file *, long, struct file_lock **, void **);
-extern int lease_modify(struct file_lock *, int, struct list_head *);
-
-struct notifier_block;
-extern int lease_register_notifier(struct notifier_block *);
-extern void lease_unregister_notifier(struct notifier_block *);
-
-struct files_struct;
-extern void show_fd_locks(struct seq_file *f,
- struct file *filp, struct files_struct *files);
-extern bool locks_owner_has_blockers(struct file_lock_context *flctx,
- fl_owner_t owner);
-
-static inline struct file_lock_context *
-locks_inode_context(const struct inode *inode)
-{
- return smp_load_acquire(&inode->i_flctx);
-}
-
-#else /* !CONFIG_FILE_LOCKING */
-static inline int fcntl_getlk(struct file *file, unsigned int cmd,
- struct flock __user *user)
-{
- return -EINVAL;
-}
-
-static inline int fcntl_setlk(unsigned int fd, struct file *file,
- unsigned int cmd, struct flock __user *user)
-{
- return -EACCES;
-}
-
-#if BITS_PER_LONG == 32
-static inline int fcntl_getlk64(struct file *file, unsigned int cmd,
- struct flock64 *user)
-{
- return -EINVAL;
-}
-
-static inline int fcntl_setlk64(unsigned int fd, struct file *file,
- unsigned int cmd, struct flock64 *user)
-{
- return -EACCES;
-}
-#endif
-static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
-{
- return -EINVAL;
-}
-
-static inline int fcntl_getlease(struct file *filp)
-{
- return F_UNLCK;
-}
-
-static inline void
-locks_free_lock_context(struct inode *inode)
-{
-}
-
-static inline void locks_init_lock(struct file_lock *fl)
-{
- return;
-}
-
-static inline void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
-{
- return;
-}
-
-static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
-{
- return;
-}
-
-static inline void locks_remove_posix(struct file *filp, fl_owner_t owner)
-{
- return;
-}
-
-static inline void locks_remove_file(struct file *filp)
-{
- return;
-}
-
-static inline void posix_test_lock(struct file *filp, struct file_lock *fl)
-{
- return;
-}
-
-static inline int posix_lock_file(struct file *filp, struct file_lock *fl,
- struct file_lock *conflock)
-{
- return -ENOLCK;
-}
-
-static inline int locks_delete_block(struct file_lock *waiter)
-{
- return -ENOENT;
-}
-
-static inline int vfs_test_lock(struct file *filp, struct file_lock *fl)
-{
- return 0;
-}
-
-static inline int vfs_lock_file(struct file *filp, unsigned int cmd,
- struct file_lock *fl, struct file_lock *conf)
-{
- return -ENOLCK;
-}
-
-static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
-{
- return 0;
-}
-
-static inline bool vfs_inode_has_locks(struct inode *inode)
-{
- return false;
-}
-
-static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
-{
- return -ENOLCK;
-}
-
-static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
-{
- return 0;
-}
-
-static inline void lease_get_mtime(struct inode *inode,
- struct timespec64 *time)
-{
- return;
-}
-
-static inline int generic_setlease(struct file *filp, long arg,
- struct file_lock **flp, void **priv)
-{
- return -EINVAL;
-}
-
-static inline int vfs_setlease(struct file *filp, long arg,
- struct file_lock **lease, void **priv)
-{
- return -EINVAL;
-}
-
-static inline int lease_modify(struct file_lock *fl, int arg,
- struct list_head *dispose)
-{
- return -EINVAL;
-}
-
-struct files_struct;
-static inline void show_fd_locks(struct seq_file *f,
- struct file *filp, struct files_struct *files) {}
-static inline bool locks_owner_has_blockers(struct file_lock_context *flctx,
- fl_owner_t owner)
-{
- return false;
-}
-
-static inline struct file_lock_context *
-locks_inode_context(const struct inode *inode)
-{
- return NULL;
-}
-
-#endif /* !CONFIG_FILE_LOCKING */
-
static inline struct inode *file_inode(const struct file *f)
{
return f->f_inode;
@@ -1357,11 +1026,6 @@ static inline struct dentry *file_dentry(const struct file *file)
return d_real(file->f_path.dentry, file_inode(file));
}
-static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
-{
- return locks_lock_inode_wait(locks_inode(filp), fl);
-}
-
struct fasync_struct {
rwlock_t fa_lock;
int magic;
@@ -1632,22 +1296,22 @@ static inline void i_gid_write(struct inode *inode, gid_t gid)
}
/**
- * i_uid_into_vfsuid - map an inode's i_uid down into a mnt_userns
- * @mnt_userns: user namespace of the mount the inode was found from
+ * i_uid_into_vfsuid - map an inode's i_uid down according to an idmapping
+ * @idmap: idmap of the mount the inode was found from
* @inode: inode to map
*
- * Return: whe inode's i_uid mapped down according to @mnt_userns.
+ * Return: whe inode's i_uid mapped down according to @idmap.
* If the inode's i_uid has no mapping INVALID_VFSUID is returned.
*/
-static inline vfsuid_t i_uid_into_vfsuid(struct user_namespace *mnt_userns,
+static inline vfsuid_t i_uid_into_vfsuid(struct mnt_idmap *idmap,
const struct inode *inode)
{
- return make_vfsuid(mnt_userns, i_user_ns(inode), inode->i_uid);
+ return make_vfsuid(idmap, i_user_ns(inode), inode->i_uid);
}
/**
* i_uid_needs_update - check whether inode's i_uid needs to be updated
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @attr: the new attributes of @inode
* @inode: the inode to update
*
@@ -1656,50 +1320,50 @@ static inline vfsuid_t i_uid_into_vfsuid(struct user_namespace *mnt_userns,
*
* Return: true if @inode's i_uid field needs to be updated, false if not.
*/
-static inline bool i_uid_needs_update(struct user_namespace *mnt_userns,
+static inline bool i_uid_needs_update(struct mnt_idmap *idmap,
const struct iattr *attr,
const struct inode *inode)
{
return ((attr->ia_valid & ATTR_UID) &&
!vfsuid_eq(attr->ia_vfsuid,
- i_uid_into_vfsuid(mnt_userns, inode)));
+ i_uid_into_vfsuid(idmap, inode)));
}
/**
* i_uid_update - update @inode's i_uid field
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @attr: the new attributes of @inode
* @inode: the inode to update
*
* Safely update @inode's i_uid field translating the vfsuid of any idmapped
* mount into the filesystem kuid.
*/
-static inline void i_uid_update(struct user_namespace *mnt_userns,
+static inline void i_uid_update(struct mnt_idmap *idmap,
const struct iattr *attr,
struct inode *inode)
{
if (attr->ia_valid & ATTR_UID)
- inode->i_uid = from_vfsuid(mnt_userns, i_user_ns(inode),
+ inode->i_uid = from_vfsuid(idmap, i_user_ns(inode),
attr->ia_vfsuid);
}
/**
- * i_gid_into_vfsgid - map an inode's i_gid down into a mnt_userns
- * @mnt_userns: user namespace of the mount the inode was found from
+ * i_gid_into_vfsgid - map an inode's i_gid down according to an idmapping
+ * @idmap: idmap of the mount the inode was found from
* @inode: inode to map
*
- * Return: the inode's i_gid mapped down according to @mnt_userns.
+ * Return: the inode's i_gid mapped down according to @idmap.
* If the inode's i_gid has no mapping INVALID_VFSGID is returned.
*/
-static inline vfsgid_t i_gid_into_vfsgid(struct user_namespace *mnt_userns,
+static inline vfsgid_t i_gid_into_vfsgid(struct mnt_idmap *idmap,
const struct inode *inode)
{
- return make_vfsgid(mnt_userns, i_user_ns(inode), inode->i_gid);
+ return make_vfsgid(idmap, i_user_ns(inode), inode->i_gid);
}
/**
* i_gid_needs_update - check whether inode's i_gid needs to be updated
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @attr: the new attributes of @inode
* @inode: the inode to update
*
@@ -1708,83 +1372,83 @@ static inline vfsgid_t i_gid_into_vfsgid(struct user_namespace *mnt_userns,
*
* Return: true if @inode's i_gid field needs to be updated, false if not.
*/
-static inline bool i_gid_needs_update(struct user_namespace *mnt_userns,
+static inline bool i_gid_needs_update(struct mnt_idmap *idmap,
const struct iattr *attr,
const struct inode *inode)
{
return ((attr->ia_valid & ATTR_GID) &&
!vfsgid_eq(attr->ia_vfsgid,
- i_gid_into_vfsgid(mnt_userns, inode)));
+ i_gid_into_vfsgid(idmap, inode)));
}
/**
* i_gid_update - update @inode's i_gid field
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @attr: the new attributes of @inode
* @inode: the inode to update
*
* Safely update @inode's i_gid field translating the vfsgid of any idmapped
* mount into the filesystem kgid.
*/
-static inline void i_gid_update(struct user_namespace *mnt_userns,
+static inline void i_gid_update(struct mnt_idmap *idmap,
const struct iattr *attr,
struct inode *inode)
{
if (attr->ia_valid & ATTR_GID)
- inode->i_gid = from_vfsgid(mnt_userns, i_user_ns(inode),
+ inode->i_gid = from_vfsgid(idmap, i_user_ns(inode),
attr->ia_vfsgid);
}
/**
* inode_fsuid_set - initialize inode's i_uid field with callers fsuid
* @inode: inode to initialize
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
*
* Initialize the i_uid field of @inode. If the inode was found/created via
- * an idmapped mount map the caller's fsuid according to @mnt_users.
+ * an idmapped mount map the caller's fsuid according to @idmap.
*/
static inline void inode_fsuid_set(struct inode *inode,
- struct user_namespace *mnt_userns)
+ struct mnt_idmap *idmap)
{
- inode->i_uid = mapped_fsuid(mnt_userns, i_user_ns(inode));
+ inode->i_uid = mapped_fsuid(idmap, i_user_ns(inode));
}
/**
* inode_fsgid_set - initialize inode's i_gid field with callers fsgid
* @inode: inode to initialize
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
*
* Initialize the i_gid field of @inode. If the inode was found/created via
- * an idmapped mount map the caller's fsgid according to @mnt_users.
+ * an idmapped mount map the caller's fsgid according to @idmap.
*/
static inline void inode_fsgid_set(struct inode *inode,
- struct user_namespace *mnt_userns)
+ struct mnt_idmap *idmap)
{
- inode->i_gid = mapped_fsgid(mnt_userns, i_user_ns(inode));
+ inode->i_gid = mapped_fsgid(idmap, i_user_ns(inode));
}
/**
* fsuidgid_has_mapping() - check whether caller's fsuid/fsgid is mapped
* @sb: the superblock we want a mapping in
- * @mnt_userns: user namespace of the relevant mount
+ * @idmap: idmap of the relevant mount
*
* Check whether the caller's fsuid and fsgid have a valid mapping in the
* s_user_ns of the superblock @sb. If the caller is on an idmapped mount map
- * the caller's fsuid and fsgid according to the @mnt_userns first.
+ * the caller's fsuid and fsgid according to the @idmap first.
*
* Return: true if fsuid and fsgid is mapped, false if not.
*/
static inline bool fsuidgid_has_mapping(struct super_block *sb,
- struct user_namespace *mnt_userns)
+ struct mnt_idmap *idmap)
{
struct user_namespace *fs_userns = sb->s_user_ns;
kuid_t kuid;
kgid_t kgid;
- kuid = mapped_fsuid(mnt_userns, fs_userns);
+ kuid = mapped_fsuid(idmap, fs_userns);
if (!uid_valid(kuid))
return false;
- kgid = mapped_fsgid(mnt_userns, fs_userns);
+ kgid = mapped_fsgid(idmap, fs_userns);
if (!gid_valid(kgid))
return false;
return kuid_has_mapping(fs_userns, kuid) &&
@@ -1938,42 +1602,42 @@ static inline bool sb_start_intwrite_trylock(struct super_block *sb)
return __sb_start_write_trylock(sb, SB_FREEZE_FS);
}
-bool inode_owner_or_capable(struct user_namespace *mnt_userns,
+bool inode_owner_or_capable(struct mnt_idmap *idmap,
const struct inode *inode);
/*
* VFS helper functions..
*/
-int vfs_create(struct user_namespace *, struct inode *,
+int vfs_create(struct mnt_idmap *, struct inode *,
struct dentry *, umode_t, bool);
-int vfs_mkdir(struct user_namespace *, struct inode *,
+int vfs_mkdir(struct mnt_idmap *, struct inode *,
struct dentry *, umode_t);
-int vfs_mknod(struct user_namespace *, struct inode *, struct dentry *,
+int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
umode_t, dev_t);
-int vfs_symlink(struct user_namespace *, struct inode *,
+int vfs_symlink(struct mnt_idmap *, struct inode *,
struct dentry *, const char *);
-int vfs_link(struct dentry *, struct user_namespace *, struct inode *,
+int vfs_link(struct dentry *, struct mnt_idmap *, struct inode *,
struct dentry *, struct inode **);
-int vfs_rmdir(struct user_namespace *, struct inode *, struct dentry *);
-int vfs_unlink(struct user_namespace *, struct inode *, struct dentry *,
+int vfs_rmdir(struct mnt_idmap *, struct inode *, struct dentry *);
+int vfs_unlink(struct mnt_idmap *, struct inode *, struct dentry *,
struct inode **);
/**
* struct renamedata - contains all information required for renaming
- * @old_mnt_userns: old user namespace of the mount the inode was found from
+ * @old_mnt_idmap: idmap of the old mount the inode was found from
* @old_dir: parent of source
* @old_dentry: source
- * @new_mnt_userns: new user namespace of the mount the inode was found from
+ * @new_mnt_idmap: idmap of the new mount the inode was found from
* @new_dir: parent of destination
* @new_dentry: destination
* @delegated_inode: returns an inode needing a delegation break
* @flags: rename flags
*/
struct renamedata {
- struct user_namespace *old_mnt_userns;
+ struct mnt_idmap *old_mnt_idmap;
struct inode *old_dir;
struct dentry *old_dentry;
- struct user_namespace *new_mnt_userns;
+ struct mnt_idmap *new_mnt_idmap;
struct inode *new_dir;
struct dentry *new_dentry;
struct inode **delegated_inode;
@@ -1982,14 +1646,14 @@ struct renamedata {
int vfs_rename(struct renamedata *);
-static inline int vfs_whiteout(struct user_namespace *mnt_userns,
+static inline int vfs_whiteout(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry)
{
- return vfs_mknod(mnt_userns, dir, dentry, S_IFCHR | WHITEOUT_MODE,
+ return vfs_mknod(idmap, dir, dentry, S_IFCHR | WHITEOUT_MODE,
WHITEOUT_DEV);
}
-struct file *vfs_tmpfile_open(struct user_namespace *mnt_userns,
+struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
const struct path *parentpath,
umode_t mode, int open_flag, const struct cred *cred);
@@ -2013,10 +1677,10 @@ extern long compat_ptr_ioctl(struct file *file, unsigned int cmd,
/*
* VFS file helper functions.
*/
-void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode,
+void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode,
const struct inode *dir, umode_t mode);
extern bool may_open_dev(const struct path *path);
-umode_t mode_strip_sgid(struct user_namespace *mnt_userns,
+umode_t mode_strip_sgid(struct mnt_idmap *idmap,
const struct inode *dir, umode_t mode);
/*
@@ -2134,27 +1798,26 @@ struct file_operations {
struct inode_operations {
struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
- int (*permission) (struct user_namespace *, struct inode *, int);
+ int (*permission) (struct mnt_idmap *, struct inode *, int);
struct posix_acl * (*get_inode_acl)(struct inode *, int, bool);
int (*readlink) (struct dentry *, char __user *,int);
- int (*create) (struct user_namespace *, struct inode *,struct dentry *,
+ int (*create) (struct mnt_idmap *, struct inode *,struct dentry *,
umode_t, bool);
int (*link) (struct dentry *,struct inode *,struct dentry *);
int (*unlink) (struct inode *,struct dentry *);
- int (*symlink) (struct user_namespace *, struct inode *,struct dentry *,
+ int (*symlink) (struct mnt_idmap *, struct inode *,struct dentry *,
const char *);
- int (*mkdir) (struct user_namespace *, struct inode *,struct dentry *,
+ int (*mkdir) (struct mnt_idmap *, struct inode *,struct dentry *,
umode_t);
int (*rmdir) (struct inode *,struct dentry *);
- int (*mknod) (struct user_namespace *, struct inode *,struct dentry *,
+ int (*mknod) (struct mnt_idmap *, struct inode *,struct dentry *,
umode_t,dev_t);
- int (*rename) (struct user_namespace *, struct inode *, struct dentry *,
+ int (*rename) (struct mnt_idmap *, struct inode *, struct dentry *,
struct inode *, struct dentry *, unsigned int);
- int (*setattr) (struct user_namespace *, struct dentry *,
- struct iattr *);
- int (*getattr) (struct user_namespace *, const struct path *,
+ int (*setattr) (struct mnt_idmap *, struct dentry *, struct iattr *);
+ int (*getattr) (struct mnt_idmap *, const struct path *,
struct kstat *, u32, unsigned int);
ssize_t (*listxattr) (struct dentry *, char *, size_t);
int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
@@ -2163,13 +1826,13 @@ struct inode_operations {
int (*atomic_open)(struct inode *, struct dentry *,
struct file *, unsigned open_flag,
umode_t create_mode);
- int (*tmpfile) (struct user_namespace *, struct inode *,
+ int (*tmpfile) (struct mnt_idmap *, struct inode *,
struct file *, umode_t);
- struct posix_acl *(*get_acl)(struct user_namespace *, struct dentry *,
+ struct posix_acl *(*get_acl)(struct mnt_idmap *, struct dentry *,
int);
- int (*set_acl)(struct user_namespace *, struct dentry *,
+ int (*set_acl)(struct mnt_idmap *, struct dentry *,
struct posix_acl *, int);
- int (*fileattr_set)(struct user_namespace *mnt_userns,
+ int (*fileattr_set)(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
} ____cacheline_aligned;
@@ -2323,11 +1986,11 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags
#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
(inode)->i_rdev == WHITEOUT_DEV)
-static inline bool HAS_UNMAPPED_ID(struct user_namespace *mnt_userns,
+static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap,
struct inode *inode)
{
- return !vfsuid_valid(i_uid_into_vfsuid(mnt_userns, inode)) ||
- !vfsgid_valid(i_gid_into_vfsgid(mnt_userns, inode));
+ return !vfsuid_valid(i_uid_into_vfsuid(idmap, inode)) ||
+ !vfsgid_valid(i_gid_into_vfsgid(idmap, inode));
}
static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
@@ -2621,96 +2284,6 @@ extern struct kobject *fs_kobj;
#define MAX_RW_COUNT (INT_MAX & PAGE_MASK)
-#ifdef CONFIG_FILE_LOCKING
-static inline int break_lease(struct inode *inode, unsigned int mode)
-{
- /*
- * Since this check is lockless, we must ensure that any refcounts
- * taken are done before checking i_flctx->flc_lease. Otherwise, we
- * could end up racing with tasks trying to set a new lease on this
- * file.
- */
- smp_mb();
- if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease))
- return __break_lease(inode, mode, FL_LEASE);
- return 0;
-}
-
-static inline int break_deleg(struct inode *inode, unsigned int mode)
-{
- /*
- * Since this check is lockless, we must ensure that any refcounts
- * taken are done before checking i_flctx->flc_lease. Otherwise, we
- * could end up racing with tasks trying to set a new lease on this
- * file.
- */
- smp_mb();
- if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease))
- return __break_lease(inode, mode, FL_DELEG);
- return 0;
-}
-
-static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
-{
- int ret;
-
- ret = break_deleg(inode, O_WRONLY|O_NONBLOCK);
- if (ret == -EWOULDBLOCK && delegated_inode) {
- *delegated_inode = inode;
- ihold(inode);
- }
- return ret;
-}
-
-static inline int break_deleg_wait(struct inode **delegated_inode)
-{
- int ret;
-
- ret = break_deleg(*delegated_inode, O_WRONLY);
- iput(*delegated_inode);
- *delegated_inode = NULL;
- return ret;
-}
-
-static inline int break_layout(struct inode *inode, bool wait)
-{
- smp_mb();
- if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease))
- return __break_lease(inode,
- wait ? O_WRONLY : O_WRONLY | O_NONBLOCK,
- FL_LAYOUT);
- return 0;
-}
-
-#else /* !CONFIG_FILE_LOCKING */
-static inline int break_lease(struct inode *inode, unsigned int mode)
-{
- return 0;
-}
-
-static inline int break_deleg(struct inode *inode, unsigned int mode)
-{
- return 0;
-}
-
-static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
-{
- return 0;
-}
-
-static inline int break_deleg_wait(struct inode **delegated_inode)
-{
- BUG();
- return 0;
-}
-
-static inline int break_layout(struct inode *inode, bool wait)
-{
- return 0;
-}
-
-#endif /* CONFIG_FILE_LOCKING */
-
/* fs/open.c */
struct audit_names;
struct filename {
@@ -2722,11 +2295,6 @@ struct filename {
};
static_assert(offsetof(struct filename, iname) % sizeof(long) == 0);
-static inline struct user_namespace *file_mnt_user_ns(struct file *file)
-{
- return mnt_user_ns(file->f_path.mnt);
-}
-
static inline struct mnt_idmap *file_mnt_idmap(struct file *file)
{
return mnt_idmap(file->f_path.mnt);
@@ -2746,7 +2314,7 @@ static inline bool is_idmapped_mnt(const struct vfsmount *mnt)
}
extern long vfs_truncate(const struct path *, loff_t);
-int do_truncate(struct user_namespace *, struct dentry *, loff_t start,
+int do_truncate(struct mnt_idmap *, struct dentry *, loff_t start,
unsigned int time_attrs, struct file *filp);
extern int vfs_fallocate(struct file *file, int mode, loff_t offset,
loff_t len);
@@ -2901,21 +2469,21 @@ static inline int bmap(struct inode *inode, sector_t *block)
}
#endif
-int notify_change(struct user_namespace *, struct dentry *,
+int notify_change(struct mnt_idmap *, struct dentry *,
struct iattr *, struct inode **);
-int inode_permission(struct user_namespace *, struct inode *, int);
-int generic_permission(struct user_namespace *, struct inode *, int);
+int inode_permission(struct mnt_idmap *, struct inode *, int);
+int generic_permission(struct mnt_idmap *, struct inode *, int);
static inline int file_permission(struct file *file, int mask)
{
- return inode_permission(file_mnt_user_ns(file),
+ return inode_permission(file_mnt_idmap(file),
file_inode(file), mask);
}
static inline int path_permission(const struct path *path, int mask)
{
- return inode_permission(mnt_user_ns(path->mnt),
+ return inode_permission(mnt_idmap(path->mnt),
d_inode(path->dentry), mask);
}
-int __check_sticky(struct user_namespace *mnt_userns, struct inode *dir,
+int __check_sticky(struct mnt_idmap *idmap, struct inode *dir,
struct inode *inode);
static inline bool execute_ok(struct inode *inode)
@@ -3103,7 +2671,7 @@ extern void __destroy_inode(struct inode *);
extern struct inode *new_inode_pseudo(struct super_block *sb);
extern struct inode *new_inode(struct super_block *sb);
extern void free_inode_nonrcu(struct inode *inode);
-extern int setattr_should_drop_suidgid(struct user_namespace *, struct inode *);
+extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *);
extern int file_remove_privs(struct file *);
/*
@@ -3262,7 +2830,7 @@ extern void page_put_link(void *);
extern int page_symlink(struct inode *inode, const char *symname, int len);
extern const struct inode_operations page_symlink_inode_operations;
extern void kfree_link(void *);
-void generic_fillattr(struct user_namespace *, struct inode *, struct kstat *);
+void generic_fillattr(struct mnt_idmap *, struct inode *, struct kstat *);
void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
@@ -3313,9 +2881,9 @@ extern int dcache_dir_open(struct inode *, struct file *);
extern int dcache_dir_close(struct inode *, struct file *);
extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
extern int dcache_readdir(struct file *, struct dir_context *);
-extern int simple_setattr(struct user_namespace *, struct dentry *,
+extern int simple_setattr(struct mnt_idmap *, struct dentry *,
struct iattr *);
-extern int simple_getattr(struct user_namespace *, const struct path *,
+extern int simple_getattr(struct mnt_idmap *, const struct path *,
struct kstat *, u32, unsigned int);
extern int simple_statfs(struct dentry *, struct kstatfs *);
extern int simple_open(struct inode *inode, struct file *file);
@@ -3324,7 +2892,7 @@ extern int simple_unlink(struct inode *, struct dentry *);
extern int simple_rmdir(struct inode *, struct dentry *);
extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry);
-extern int simple_rename(struct user_namespace *, struct inode *,
+extern int simple_rename(struct mnt_idmap *, struct inode *,
struct dentry *, struct inode *, struct dentry *,
unsigned int);
extern void simple_recursive_removal(struct dentry *,
@@ -3366,11 +2934,11 @@ extern int generic_check_addressable(unsigned, u64);
extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry);
-int may_setattr(struct user_namespace *mnt_userns, struct inode *inode,
+int may_setattr(struct mnt_idmap *idmap, struct inode *inode,
unsigned int ia_valid);
-int setattr_prepare(struct user_namespace *, struct dentry *, struct iattr *);
+int setattr_prepare(struct mnt_idmap *, struct dentry *, struct iattr *);
extern int inode_newsize_ok(const struct inode *, loff_t offset);
-void setattr_copy(struct user_namespace *, struct inode *inode,
+void setattr_copy(struct mnt_idmap *, struct inode *inode,
const struct iattr *attr);
extern int file_update_time(struct file *file);
@@ -3537,13 +3105,13 @@ static inline bool is_sxid(umode_t mode)
return mode & (S_ISUID | S_ISGID);
}
-static inline int check_sticky(struct user_namespace *mnt_userns,
+static inline int check_sticky(struct mnt_idmap *idmap,
struct inode *dir, struct inode *inode)
{
if (!(dir->i_mode & S_ISVTX))
return 0;
- return __check_sticky(mnt_userns, dir, inode);
+ return __check_sticky(idmap, dir, inode);
}
static inline void inode_has_no_xattr(struct inode *inode)
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 4f5f8a651213..e0a49c3125eb 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -257,8 +257,8 @@ int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page,
unsigned int len, unsigned int offs,
u64 lblk_num, gfp_t gfp_flags);
-int fscrypt_decrypt_pagecache_blocks(struct page *page, unsigned int len,
- unsigned int offs);
+int fscrypt_decrypt_pagecache_blocks(struct folio *folio, size_t len,
+ size_t offs);
int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page,
unsigned int len, unsigned int offs,
u64 lblk_num);
@@ -309,8 +309,6 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy)
/* keyring.c */
void fscrypt_destroy_keyring(struct super_block *sb);
int fscrypt_ioctl_add_key(struct file *filp, void __user *arg);
-int fscrypt_add_test_dummy_key(struct super_block *sb,
- const struct fscrypt_dummy_policy *dummy_policy);
int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg);
int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *arg);
int fscrypt_ioctl_get_key_status(struct file *filp, void __user *arg);
@@ -422,9 +420,8 @@ static inline int fscrypt_encrypt_block_inplace(const struct inode *inode,
return -EOPNOTSUPP;
}
-static inline int fscrypt_decrypt_pagecache_blocks(struct page *page,
- unsigned int len,
- unsigned int offs)
+static inline int fscrypt_decrypt_pagecache_blocks(struct folio *folio,
+ size_t len, size_t offs)
{
return -EOPNOTSUPP;
}
@@ -530,13 +527,6 @@ static inline int fscrypt_ioctl_add_key(struct file *filp, void __user *arg)
return -EOPNOTSUPP;
}
-static inline int
-fscrypt_add_test_dummy_key(struct super_block *sb,
- const struct fscrypt_dummy_policy *dummy_policy)
-{
- return 0;
-}
-
static inline int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg)
{
return -EOPNOTSUPP;
diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
index 40f14e5fed9d..119a3266791f 100644
--- a/include/linux/fsverity.h
+++ b/include/linux/fsverity.h
@@ -12,6 +12,7 @@
#define _LINUX_FSVERITY_H
#include <linux/fs.h>
+#include <linux/mm.h>
#include <crypto/hash_info.h>
#include <crypto/sha2.h>
#include <uapi/linux/fsverity.h>
@@ -93,8 +94,7 @@ struct fsverity_operations {
* isn't already cached. Implementations may ignore this
* argument; it's only a performance optimization.
*
- * This can be called at any time on an open verity file, as well as
- * between ->begin_enable_verity() and ->end_enable_verity(). It may be
+ * This can be called at any time on an open verity file. It may be
* called by multiple processes concurrently, even with the same page.
*
* Note that this must retrieve a *page*, not necessarily a *block*.
@@ -109,9 +109,9 @@ struct fsverity_operations {
* Write a Merkle tree block to the given inode.
*
* @inode: the inode for which the Merkle tree is being built
- * @buf: block to write
- * @index: 0-based index of the block within the Merkle tree
- * @log_blocksize: log base 2 of the Merkle tree block size
+ * @buf: the Merkle tree block to write
+ * @pos: the position of the block in the Merkle tree (in bytes)
+ * @size: the Merkle tree block size (in bytes)
*
* This is only called between ->begin_enable_verity() and
* ->end_enable_verity().
@@ -119,7 +119,7 @@ struct fsverity_operations {
* Return: 0 on success, -errno on failure
*/
int (*write_merkle_tree_block)(struct inode *inode, const void *buf,
- u64 index, int log_blocksize);
+ u64 pos, unsigned int size);
};
#ifdef CONFIG_FS_VERITY
@@ -148,9 +148,21 @@ int fsverity_get_digest(struct inode *inode,
/* open.c */
-int fsverity_file_open(struct inode *inode, struct file *filp);
-int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr);
-void fsverity_cleanup_inode(struct inode *inode);
+int __fsverity_file_open(struct inode *inode, struct file *filp);
+int __fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr);
+void __fsverity_cleanup_inode(struct inode *inode);
+
+/**
+ * fsverity_cleanup_inode() - free the inode's verity info, if present
+ * @inode: an inode being evicted
+ *
+ * Filesystems must call this on inode eviction to free ->i_verity_info.
+ */
+static inline void fsverity_cleanup_inode(struct inode *inode)
+{
+ if (inode->i_verity_info)
+ __fsverity_cleanup_inode(inode);
+}
/* read_metadata.c */
@@ -158,7 +170,7 @@ int fsverity_ioctl_read_metadata(struct file *filp, const void __user *uarg);
/* verify.c */
-bool fsverity_verify_page(struct page *page);
+bool fsverity_verify_blocks(struct folio *folio, size_t len, size_t offset);
void fsverity_verify_bio(struct bio *bio);
void fsverity_enqueue_verify_work(struct work_struct *work);
@@ -193,15 +205,15 @@ static inline int fsverity_get_digest(struct inode *inode,
/* open.c */
-static inline int fsverity_file_open(struct inode *inode, struct file *filp)
+static inline int __fsverity_file_open(struct inode *inode, struct file *filp)
{
- return IS_VERITY(inode) ? -EOPNOTSUPP : 0;
+ return -EOPNOTSUPP;
}
-static inline int fsverity_prepare_setattr(struct dentry *dentry,
- struct iattr *attr)
+static inline int __fsverity_prepare_setattr(struct dentry *dentry,
+ struct iattr *attr)
{
- return IS_VERITY(d_inode(dentry)) ? -EOPNOTSUPP : 0;
+ return -EOPNOTSUPP;
}
static inline void fsverity_cleanup_inode(struct inode *inode)
@@ -218,7 +230,8 @@ static inline int fsverity_ioctl_read_metadata(struct file *filp,
/* verify.c */
-static inline bool fsverity_verify_page(struct page *page)
+static inline bool fsverity_verify_blocks(struct folio *folio, size_t len,
+ size_t offset)
{
WARN_ON(1);
return false;
@@ -236,6 +249,16 @@ static inline void fsverity_enqueue_verify_work(struct work_struct *work)
#endif /* !CONFIG_FS_VERITY */
+static inline bool fsverity_verify_folio(struct folio *folio)
+{
+ return fsverity_verify_blocks(folio, folio_size(folio), 0);
+}
+
+static inline bool fsverity_verify_page(struct page *page)
+{
+ return fsverity_verify_blocks(page_folio(page), PAGE_SIZE, 0);
+}
+
/**
* fsverity_active() - do reads from the inode need to go through fs-verity?
* @inode: inode to check
@@ -254,4 +277,42 @@ static inline bool fsverity_active(const struct inode *inode)
return fsverity_get_info(inode) != NULL;
}
+/**
+ * fsverity_file_open() - prepare to open a verity file
+ * @inode: the inode being opened
+ * @filp: the struct file being set up
+ *
+ * When opening a verity file, deny the open if it is for writing. Otherwise,
+ * set up the inode's ->i_verity_info if not already done.
+ *
+ * When combined with fscrypt, this must be called after fscrypt_file_open().
+ * Otherwise, we won't have the key set up to decrypt the verity metadata.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+static inline int fsverity_file_open(struct inode *inode, struct file *filp)
+{
+ if (IS_VERITY(inode))
+ return __fsverity_file_open(inode, filp);
+ return 0;
+}
+
+/**
+ * fsverity_prepare_setattr() - prepare to change a verity inode's attributes
+ * @dentry: dentry through which the inode is being changed
+ * @attr: attributes to change
+ *
+ * Verity files are immutable, so deny truncates. This isn't covered by the
+ * open-time check because sys_truncate() takes a path, not a file descriptor.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+static inline int fsverity_prepare_setattr(struct dentry *dentry,
+ struct iattr *attr)
+{
+ if (IS_VERITY(d_inode(dentry)))
+ return __fsverity_prepare_setattr(dentry, attr);
+ return 0;
+}
+
#endif /* _LINUX_FSVERITY_H */
diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
index 034b1106d022..a3028e400a9c 100644
--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -152,7 +152,10 @@ static inline void totalhigh_pages_add(long count)
static inline bool is_kmap_addr(const void *x)
{
unsigned long addr = (unsigned long)x;
- return addr >= PKMAP_ADDR(0) && addr < PKMAP_ADDR(LAST_PKMAP);
+
+ return (addr >= PKMAP_ADDR(0) && addr < PKMAP_ADDR(LAST_PKMAP)) ||
+ (addr >= __fix_to_virt(FIX_KMAP_END) &&
+ addr < __fix_to_virt(FIX_KMAP_BEGIN));
}
#else /* CONFIG_HIGHMEM */
@@ -200,7 +203,7 @@ static inline void *kmap_local_pfn(unsigned long pfn)
static inline void __kunmap_local(const void *addr)
{
#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
- kunmap_flush_on_unmap(addr);
+ kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE));
#endif
}
@@ -227,7 +230,7 @@ static inline void *kmap_atomic_pfn(unsigned long pfn)
static inline void __kunmap_atomic(const void *addr)
{
#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
- kunmap_flush_on_unmap(addr);
+ kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE));
#endif
pagefault_enable();
if (IS_ENABLED(CONFIG_PREEMPT_RT))
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 551834cd5299..9ab9d3105d5c 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -7,6 +7,7 @@
#include <linux/fs.h>
#include <linux/hugetlb_inline.h>
#include <linux/cgroup.h>
+#include <linux/page_ref.h>
#include <linux/list.h>
#include <linux/kref.h>
#include <linux/pgtable.h>
@@ -742,7 +743,10 @@ static inline struct hstate *hstate_sizelog(int page_size_log)
if (!page_size_log)
return &default_hstate;
- return size_to_hstate(1UL << page_size_log);
+ if (page_size_log < BITS_PER_LONG)
+ return size_to_hstate(1UL << page_size_log);
+
+ return NULL;
}
static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
@@ -1187,6 +1191,18 @@ static inline __init void hugetlb_cma_reserve(int order)
}
#endif
+#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
+static inline bool hugetlb_pmd_shared(pte_t *pte)
+{
+ return page_count(virt_to_page(pte)) > 1;
+}
+#else
+static inline bool hugetlb_pmd_shared(pte_t *pte)
+{
+ return false;
+}
+#endif
+
bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr);
#ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 5a0b2a285a18..172b113a9864 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -18,7 +18,7 @@ struct linux_binprm;
extern enum hash_algo ima_get_current_hash_algo(void);
extern int ima_bprm_check(struct linux_binprm *bprm);
extern int ima_file_check(struct file *file, int mask);
-extern void ima_post_create_tmpfile(struct user_namespace *mnt_userns,
+extern void ima_post_create_tmpfile(struct mnt_idmap *idmap,
struct inode *inode);
extern void ima_file_free(struct file *file);
extern int ima_file_mmap(struct file *file, unsigned long prot);
@@ -30,7 +30,7 @@ extern int ima_read_file(struct file *file, enum kernel_read_file_id id,
bool contents);
extern int ima_post_read_file(struct file *file, void *buf, loff_t size,
enum kernel_read_file_id id);
-extern void ima_post_path_mknod(struct user_namespace *mnt_userns,
+extern void ima_post_path_mknod(struct mnt_idmap *idmap,
struct dentry *dentry);
extern int ima_file_hash(struct file *file, char *buf, size_t buf_size);
extern int ima_inode_hash(struct inode *inode, char *buf, size_t buf_size);
@@ -66,7 +66,7 @@ static inline int ima_file_check(struct file *file, int mask)
return 0;
}
-static inline void ima_post_create_tmpfile(struct user_namespace *mnt_userns,
+static inline void ima_post_create_tmpfile(struct mnt_idmap *idmap,
struct inode *inode)
{
}
@@ -111,7 +111,7 @@ static inline int ima_post_read_file(struct file *file, void *buf, loff_t size,
return 0;
}
-static inline void ima_post_path_mknod(struct user_namespace *mnt_userns,
+static inline void ima_post_path_mknod(struct mnt_idmap *idmap,
struct dentry *dentry)
{
return;
@@ -183,18 +183,18 @@ static inline void ima_post_key_create_or_update(struct key *keyring,
#ifdef CONFIG_IMA_APPRAISE
extern bool is_ima_appraise_enabled(void);
-extern void ima_inode_post_setattr(struct user_namespace *mnt_userns,
+extern void ima_inode_post_setattr(struct mnt_idmap *idmap,
struct dentry *dentry);
extern int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
const void *xattr_value, size_t xattr_value_len);
-extern int ima_inode_set_acl(struct user_namespace *mnt_userns,
+extern int ima_inode_set_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name,
struct posix_acl *kacl);
-static inline int ima_inode_remove_acl(struct user_namespace *mnt_userns,
+static inline int ima_inode_remove_acl(struct mnt_idmap *idmap,
struct dentry *dentry,
const char *acl_name)
{
- return ima_inode_set_acl(mnt_userns, dentry, acl_name, NULL);
+ return ima_inode_set_acl(idmap, dentry, acl_name, NULL);
}
extern int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name);
#else
@@ -203,7 +203,7 @@ static inline bool is_ima_appraise_enabled(void)
return 0;
}
-static inline void ima_inode_post_setattr(struct user_namespace *mnt_userns,
+static inline void ima_inode_post_setattr(struct mnt_idmap *idmap,
struct dentry *dentry)
{
return;
@@ -217,7 +217,7 @@ static inline int ima_inode_setxattr(struct dentry *dentry,
return 0;
}
-static inline int ima_inode_set_acl(struct user_namespace *mnt_userns,
+static inline int ima_inode_set_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name,
struct posix_acl *kacl)
{
@@ -231,7 +231,7 @@ static inline int ima_inode_removexattr(struct dentry *dentry,
return 0;
}
-static inline int ima_inode_remove_acl(struct user_namespace *mnt_userns,
+static inline int ima_inode_remove_acl(struct mnt_idmap *idmap,
struct dentry *dentry,
const char *acl_name)
{
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index dcd8a563ab52..128a67a40065 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -292,6 +292,8 @@ struct io_ring_ctx {
struct {
spinlock_t completion_lock;
+ bool poll_multi_queue;
+
/*
* ->iopoll_list is protected by the ctx->uring_lock for
* io_uring instances that don't use IORING_SETUP_SQPOLL.
@@ -300,7 +302,6 @@ struct io_ring_ctx {
*/
struct io_wq_work_list iopoll_list;
struct io_hash_table cancel_table;
- bool poll_multi_queue;
struct llist_head work_llist;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 0983dfc9a203..fca43a4bd96b 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -58,8 +58,7 @@ struct vm_fault;
#define IOMAP_F_SHARED (1U << 2)
#define IOMAP_F_MERGED (1U << 3)
#define IOMAP_F_BUFFER_HEAD (1U << 4)
-#define IOMAP_F_ZONE_APPEND (1U << 5)
-#define IOMAP_F_XATTR (1U << 6)
+#define IOMAP_F_XATTR (1U << 5)
/*
* Flags set by the core iomap code during operations:
diff --git a/include/linux/iversion.h b/include/linux/iversion.h
index e27bd4f55d84..f174ff1b59ee 100644
--- a/include/linux/iversion.h
+++ b/include/linux/iversion.h
@@ -9,8 +9,26 @@
* ---------------------------
* The change attribute (i_version) is mandated by NFSv4 and is mostly for
* knfsd, but is also used for other purposes (e.g. IMA). The i_version must
- * appear different to observers if there was a change to the inode's data or
- * metadata since it was last queried.
+ * appear larger to observers if there was an explicit change to the inode's
+ * data or metadata since it was last queried.
+ *
+ * An explicit change is one that would ordinarily result in a change to the
+ * inode status change time (aka ctime). i_version must appear to change, even
+ * if the ctime does not (since the whole point is to avoid missing updates due
+ * to timestamp granularity). If POSIX or other relevant spec mandates that the
+ * ctime must change due to an operation, then the i_version counter must be
+ * incremented as well.
+ *
+ * Making the i_version update completely atomic with the operation itself would
+ * be prohibitively expensive. Traditionally the kernel has updated the times on
+ * directories after an operation that changes its contents. For regular files,
+ * the ctime is usually updated before the data is copied into the cache for a
+ * write. This means that there is a window of time when an observer can
+ * associate a new timestamp with old file contents. Since the purpose of the
+ * i_version is to allow for better cache coherency, the i_version must always
+ * be updated after the results of the operation are visible. Updating it before
+ * and after a change is also permitted. (Note that no filesystems currently do
+ * this. Fixing that is a work-in-progress).
*
* Observers see the i_version as a 64-bit number that never decreases. If it
* remains the same since it was last checked, then nothing has changed in the
@@ -234,42 +252,6 @@ inode_peek_iversion(const struct inode *inode)
return inode_peek_iversion_raw(inode) >> I_VERSION_QUERIED_SHIFT;
}
-/**
- * inode_query_iversion - read i_version for later use
- * @inode: inode from which i_version should be read
- *
- * Read the inode i_version counter. This should be used by callers that wish
- * to store the returned i_version for later comparison. This will guarantee
- * that a later query of the i_version will result in a different value if
- * anything has changed.
- *
- * In this implementation, we fetch the current value, set the QUERIED flag and
- * then try to swap it into place with a cmpxchg, if it wasn't already set. If
- * that fails, we try again with the newly fetched value from the cmpxchg.
- */
-static inline u64
-inode_query_iversion(struct inode *inode)
-{
- u64 cur, new;
-
- cur = inode_peek_iversion_raw(inode);
- do {
- /* If flag is already set, then no need to swap */
- if (cur & I_VERSION_QUERIED) {
- /*
- * This barrier (and the implicit barrier in the
- * cmpxchg below) pairs with the barrier in
- * inode_maybe_inc_iversion().
- */
- smp_mb();
- break;
- }
-
- new = cur | I_VERSION_QUERIED;
- } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new));
- return cur >> I_VERSION_QUERIED_SHIFT;
-}
-
/*
* For filesystems without any sort of change attribute, the best we can
* do is fake one up from the ctime:
@@ -283,6 +265,8 @@ static inline u64 time_to_chattr(struct timespec64 *t)
return chattr;
}
+u64 inode_query_iversion(struct inode *inode);
+
/**
* inode_eq_iversion_raw - check whether the raw i_version counter has changed
* @inode: inode to check
diff --git a/include/linux/key.h b/include/linux/key.h
index d27477faf00d..8dc7f7c3088b 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -386,6 +386,14 @@ extern int wait_for_key_construction(struct key *key, bool intr);
extern int key_validate(const struct key *key);
+extern key_ref_t key_create(key_ref_t keyring,
+ const char *type,
+ const char *description,
+ const void *payload,
+ size_t plen,
+ key_perm_t perm,
+ unsigned long flags);
+
extern key_ref_t key_create_or_update(key_ref_t keyring,
const char *type,
const char *description,
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 70ce419e2709..2b7f067af3c4 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -312,7 +312,7 @@ static inline struct file *nlmsvc_file_file(struct nlm_file *file)
static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
{
- return locks_inode(nlmsvc_file_file(file));
+ return file_inode(nlmsvc_file_file(file));
}
static inline int __nlm_privileged_request4(const struct sockaddr *sap)
@@ -372,7 +372,7 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp)
static inline int nlm_compare_locks(const struct file_lock *fl1,
const struct file_lock *fl2)
{
- return locks_inode(fl1->fl_file) == locks_inode(fl2->fl_file)
+ return file_inode(fl1->fl_file) == file_inode(fl2->fl_file)
&& fl1->fl_pid == fl2->fl_pid
&& fl1->fl_owner == fl2->fl_owner
&& fl1->fl_start == fl2->fl_start
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index 67e4a2c5500b..b60fbcd8cdfa 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -11,6 +11,7 @@
#define LOCKD_XDR_H
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/nfs.h>
#include <linux/sunrpc/xdr.h>
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index ed6cb2ac55fa..094b76dc7164 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -136,25 +136,25 @@ LSM_HOOK(int, 0, inode_follow_link, struct dentry *dentry, struct inode *inode,
LSM_HOOK(int, 0, inode_permission, struct inode *inode, int mask)
LSM_HOOK(int, 0, inode_setattr, struct dentry *dentry, struct iattr *attr)
LSM_HOOK(int, 0, inode_getattr, const struct path *path)
-LSM_HOOK(int, 0, inode_setxattr, struct user_namespace *mnt_userns,
+LSM_HOOK(int, 0, inode_setxattr, struct mnt_idmap *idmap,
struct dentry *dentry, const char *name, const void *value,
size_t size, int flags)
LSM_HOOK(void, LSM_RET_VOID, inode_post_setxattr, struct dentry *dentry,
const char *name, const void *value, size_t size, int flags)
LSM_HOOK(int, 0, inode_getxattr, struct dentry *dentry, const char *name)
LSM_HOOK(int, 0, inode_listxattr, struct dentry *dentry)
-LSM_HOOK(int, 0, inode_removexattr, struct user_namespace *mnt_userns,
+LSM_HOOK(int, 0, inode_removexattr, struct mnt_idmap *idmap,
struct dentry *dentry, const char *name)
-LSM_HOOK(int, 0, inode_set_acl, struct user_namespace *mnt_userns,
+LSM_HOOK(int, 0, inode_set_acl, struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name, struct posix_acl *kacl)
-LSM_HOOK(int, 0, inode_get_acl, struct user_namespace *mnt_userns,
+LSM_HOOK(int, 0, inode_get_acl, struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name)
-LSM_HOOK(int, 0, inode_remove_acl, struct user_namespace *mnt_userns,
+LSM_HOOK(int, 0, inode_remove_acl, struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name)
LSM_HOOK(int, 0, inode_need_killpriv, struct dentry *dentry)
-LSM_HOOK(int, 0, inode_killpriv, struct user_namespace *mnt_userns,
+LSM_HOOK(int, 0, inode_killpriv, struct mnt_idmap *idmap,
struct dentry *dentry)
-LSM_HOOK(int, -EOPNOTSUPP, inode_getsecurity, struct user_namespace *mnt_userns,
+LSM_HOOK(int, -EOPNOTSUPP, inode_getsecurity, struct mnt_idmap *idmap,
struct inode *inode, const char *name, void **buffer, bool alloc)
LSM_HOOK(int, -EOPNOTSUPP, inode_setsecurity, struct inode *inode,
const char *name, const void *value, size_t size, int flags)
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 0a5ba81f7367..6e156d2acffc 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -475,7 +475,7 @@
* @inode_killpriv:
* The setuid bit is being removed. Remove similar security labels.
* Called with the dentry->d_inode->i_mutex held.
- * @mnt_userns: user namespace of the mount.
+ * @idmap: idmap of the mount.
* @dentry is the dentry being changed.
* Return 0 on success. If error is returned, then the operation
* causing setuid bit removal is failed.
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d3c8203cab6c..85dc9b88ea37 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1666,10 +1666,13 @@ void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio,
static inline void mem_cgroup_track_foreign_dirty(struct folio *folio,
struct bdi_writeback *wb)
{
+ struct mem_cgroup *memcg;
+
if (mem_cgroup_disabled())
return;
- if (unlikely(&folio_memcg(folio)->css != wb->memcg_css))
+ memcg = folio_memcg(folio);
+ if (unlikely(memcg && &memcg->css != wb->memcg_css))
mem_cgroup_track_foreign_dirty_slowpath(folio, wb);
}
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 5fe5d198b57a..29d4b201c7b2 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1091,6 +1091,11 @@ enum {
};
enum {
+ MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN = 0x1,
+ MLX5_VPORT_CVLAN_INSERT_ALWAYS = 0x3,
+};
+
+enum {
MLX5_L3_PROT_TYPE_IPV4 = 0,
MLX5_L3_PROT_TYPE_IPV6 = 1,
};
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index d476255c9a3f..333c1fec72f8 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -315,7 +315,7 @@ struct mlx5_cmd {
struct mlx5_cmd_debug dbg;
struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES];
int checksum_disabled;
- struct mlx5_cmd_stats *stats;
+ struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX];
};
struct mlx5_cmd_mailbox {
@@ -573,6 +573,14 @@ struct mlx5_debugfs_entries {
struct dentry *lag_debugfs;
};
+enum mlx5_func_type {
+ MLX5_PF,
+ MLX5_VF,
+ MLX5_SF,
+ MLX5_HOST_PF,
+ MLX5_FUNC_TYPE_NUM,
+};
+
struct mlx5_ft_pool;
struct mlx5_priv {
/* IRQ table valid only for real pci devices PF or VF */
@@ -583,11 +591,10 @@ struct mlx5_priv {
struct mlx5_nb pg_nb;
struct workqueue_struct *pg_wq;
struct xarray page_root_xa;
- u32 fw_pages;
atomic_t reg_pages;
struct list_head free_list;
- u32 vfs_pages;
- u32 host_pf_pages;
+ u32 fw_pages;
+ u32 page_counters[MLX5_FUNC_TYPE_NUM];
u32 fw_pages_alloc_failed;
u32 give_pages_dropped;
u32 reclaim_pages_discard;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f3d1c62c98dd..a9ee7bc59c90 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -913,7 +913,8 @@ struct mlx5_ifc_e_switch_cap_bits {
u8 vport_svlan_insert[0x1];
u8 vport_cvlan_insert_if_not_exist[0x1];
u8 vport_cvlan_insert_overwrite[0x1];
- u8 reserved_at_5[0x2];
+ u8 reserved_at_5[0x1];
+ u8 vport_cvlan_insert_always[0x1];
u8 esw_shared_ingress_acl[0x1];
u8 esw_uplink_ingress_acl[0x1];
u8 root_ft_on_other_esw[0x1];
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f3f196e4d66d..bd3197748562 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -137,7 +137,7 @@ extern int mmap_rnd_compat_bits __read_mostly;
* define their own version of this macro in <asm/pgtable.h>
*/
#if BITS_PER_LONG == 64
-/* This function must be updated when the size of struct page grows above 80
+/* This function must be updated when the size of struct page grows above 96
* or reduces below 56. The idea that compiler optimizes out switch()
* statement, and only leaves move/store instructions. Also the compiler can
* combine write statements if they are both assignments and can be reordered,
@@ -148,12 +148,18 @@ static inline void __mm_zero_struct_page(struct page *page)
{
unsigned long *_pp = (void *)page;
- /* Check that struct page is either 56, 64, 72, or 80 bytes */
+ /* Check that struct page is either 56, 64, 72, 80, 88 or 96 bytes */
BUILD_BUG_ON(sizeof(struct page) & 7);
BUILD_BUG_ON(sizeof(struct page) < 56);
- BUILD_BUG_ON(sizeof(struct page) > 80);
+ BUILD_BUG_ON(sizeof(struct page) > 96);
switch (sizeof(struct page)) {
+ case 96:
+ _pp[11] = 0;
+ fallthrough;
+ case 88:
+ _pp[10] = 0;
+ fallthrough;
case 80:
_pp[9] = 0;
fallthrough;
@@ -1270,10 +1276,10 @@ static inline void folio_put_refs(struct folio *folio, int refs)
__folio_put(folio);
}
-/**
- * release_pages - release an array of pages or folios
+/*
+ * union release_pages_arg - an array of pages or folios
*
- * This just releases a simple array of multiple pages, and
+ * release_pages() releases a simple array of multiple pages, and
* accepts various different forms of said page array: either
* a regular old boring array of pages, an array of folios, or
* an array of encoded page pointers.
@@ -2095,8 +2101,6 @@ int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
struct task_struct *task, bool bypass_rlim);
struct kvec;
-int get_kernel_pages(const struct kvec *iov, int nr_pages, int write,
- struct page **pages);
struct page *get_dump_page(unsigned long addr);
bool folio_mark_dirty(struct folio *folio);
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index e8ed225d8f7c..ff3f3f23f649 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -413,8 +413,7 @@ static inline void free_anon_vma_name(struct vm_area_struct *vma)
* Not using anon_vma_name because it generates a warning if mmap_lock
* is not held, which might be the case here.
*/
- if (!vma->vm_file)
- anon_vma_name_put(vma->anon_name);
+ anon_vma_name_put(vma->anon_name);
}
static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 3b8475007734..9757067c3053 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -581,7 +581,7 @@ struct vm_area_struct {
/*
* For private and shared anonymous mappings, a pointer to a null
* terminated string containing the name given to the vma, or NULL if
- * unnamed. Serialized by mmap_sem. Use anon_vma_name to access.
+ * unnamed. Serialized by mmap_lock. Use anon_vma_name to access.
*/
struct anon_vma_name *anon_name;
#endif
diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h
index 0ccca33a7a6d..057c89867aa2 100644
--- a/include/linux/mnt_idmapping.h
+++ b/include/linux/mnt_idmapping.h
@@ -113,167 +113,23 @@ static inline bool vfsgid_eq_kgid(vfsgid_t vfsgid, kgid_t kgid)
#define AS_KUIDT(val) (kuid_t){ __vfsuid_val(val) }
#define AS_KGIDT(val) (kgid_t){ __vfsgid_val(val) }
-#ifdef CONFIG_MULTIUSER
-/**
- * vfsgid_in_group_p() - check whether a vfsuid matches the caller's groups
- * @vfsgid: the mnt gid to match
- *
- * This function can be used to determine whether @vfsuid matches any of the
- * caller's groups.
- *
- * Return: 1 if vfsuid matches caller's groups, 0 if not.
- */
-static inline int vfsgid_in_group_p(vfsgid_t vfsgid)
-{
- return in_group_p(AS_KGIDT(vfsgid));
-}
-#else
-static inline int vfsgid_in_group_p(vfsgid_t vfsgid)
-{
- return 1;
-}
-#endif
+int vfsgid_in_group_p(vfsgid_t vfsgid);
-/**
- * initial_idmapping - check whether this is the initial mapping
- * @ns: idmapping to check
- *
- * Check whether this is the initial mapping, mapping 0 to 0, 1 to 1,
- * [...], 1000 to 1000 [...].
- *
- * Return: true if this is the initial mapping, false if not.
- */
-static inline bool initial_idmapping(const struct user_namespace *ns)
-{
- return ns == &init_user_ns;
-}
+vfsuid_t make_vfsuid(struct mnt_idmap *idmap,
+ struct user_namespace *fs_userns, kuid_t kuid);
-/**
- * no_idmapping - check whether we can skip remapping a kuid/gid
- * @mnt_userns: the mount's idmapping
- * @fs_userns: the filesystem's idmapping
- *
- * This function can be used to check whether a remapping between two
- * idmappings is required.
- * An idmapped mount is a mount that has an idmapping attached to it that
- * is different from the filsystem's idmapping and the initial idmapping.
- * If the initial mapping is used or the idmapping of the mount and the
- * filesystem are identical no remapping is required.
- *
- * Return: true if remapping can be skipped, false if not.
- */
-static inline bool no_idmapping(const struct user_namespace *mnt_userns,
- const struct user_namespace *fs_userns)
-{
- return initial_idmapping(mnt_userns) || mnt_userns == fs_userns;
-}
+vfsgid_t make_vfsgid(struct mnt_idmap *idmap,
+ struct user_namespace *fs_userns, kgid_t kgid);
-/**
- * make_vfsuid - map a filesystem kuid into a mnt_userns
- * @mnt_userns: the mount's idmapping
- * @fs_userns: the filesystem's idmapping
- * @kuid : kuid to be mapped
- *
- * Take a @kuid and remap it from @fs_userns into @mnt_userns. Use this
- * function when preparing a @kuid to be reported to userspace.
- *
- * If no_idmapping() determines that this is not an idmapped mount we can
- * simply return @kuid unchanged.
- * If initial_idmapping() tells us that the filesystem is not mounted with an
- * idmapping we know the value of @kuid won't change when calling
- * from_kuid() so we can simply retrieve the value via __kuid_val()
- * directly.
- *
- * Return: @kuid mapped according to @mnt_userns.
- * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is
- * returned.
- */
+kuid_t from_vfsuid(struct mnt_idmap *idmap,
+ struct user_namespace *fs_userns, vfsuid_t vfsuid);
-static inline vfsuid_t make_vfsuid(struct user_namespace *mnt_userns,
- struct user_namespace *fs_userns,
- kuid_t kuid)
-{
- uid_t uid;
-
- if (no_idmapping(mnt_userns, fs_userns))
- return VFSUIDT_INIT(kuid);
- if (initial_idmapping(fs_userns))
- uid = __kuid_val(kuid);
- else
- uid = from_kuid(fs_userns, kuid);
- if (uid == (uid_t)-1)
- return INVALID_VFSUID;
- return VFSUIDT_INIT(make_kuid(mnt_userns, uid));
-}
-
-/**
- * make_vfsgid - map a filesystem kgid into a mnt_userns
- * @mnt_userns: the mount's idmapping
- * @fs_userns: the filesystem's idmapping
- * @kgid : kgid to be mapped
- *
- * Take a @kgid and remap it from @fs_userns into @mnt_userns. Use this
- * function when preparing a @kgid to be reported to userspace.
- *
- * If no_idmapping() determines that this is not an idmapped mount we can
- * simply return @kgid unchanged.
- * If initial_idmapping() tells us that the filesystem is not mounted with an
- * idmapping we know the value of @kgid won't change when calling
- * from_kgid() so we can simply retrieve the value via __kgid_val()
- * directly.
- *
- * Return: @kgid mapped according to @mnt_userns.
- * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is
- * returned.
- */
-
-static inline vfsgid_t make_vfsgid(struct user_namespace *mnt_userns,
- struct user_namespace *fs_userns,
- kgid_t kgid)
-{
- gid_t gid;
-
- if (no_idmapping(mnt_userns, fs_userns))
- return VFSGIDT_INIT(kgid);
- if (initial_idmapping(fs_userns))
- gid = __kgid_val(kgid);
- else
- gid = from_kgid(fs_userns, kgid);
- if (gid == (gid_t)-1)
- return INVALID_VFSGID;
- return VFSGIDT_INIT(make_kgid(mnt_userns, gid));
-}
-
-/**
- * from_vfsuid - map a vfsuid into the filesystem idmapping
- * @mnt_userns: the mount's idmapping
- * @fs_userns: the filesystem's idmapping
- * @vfsuid : vfsuid to be mapped
- *
- * Map @vfsuid into the filesystem idmapping. This function has to be used in
- * order to e.g. write @vfsuid to inode->i_uid.
- *
- * Return: @vfsuid mapped into the filesystem idmapping
- */
-static inline kuid_t from_vfsuid(struct user_namespace *mnt_userns,
- struct user_namespace *fs_userns,
- vfsuid_t vfsuid)
-{
- uid_t uid;
-
- if (no_idmapping(mnt_userns, fs_userns))
- return AS_KUIDT(vfsuid);
- uid = from_kuid(mnt_userns, AS_KUIDT(vfsuid));
- if (uid == (uid_t)-1)
- return INVALID_UID;
- if (initial_idmapping(fs_userns))
- return KUIDT_INIT(uid);
- return make_kuid(fs_userns, uid);
-}
+kgid_t from_vfsgid(struct mnt_idmap *idmap,
+ struct user_namespace *fs_userns, vfsgid_t vfsgid);
/**
* vfsuid_has_fsmapping - check whether a vfsuid maps into the filesystem
- * @mnt_userns: the mount's idmapping
+ * @idmap: the mount's idmapping
* @fs_userns: the filesystem's idmapping
* @vfsuid: vfsuid to be mapped
*
@@ -283,11 +139,11 @@ static inline kuid_t from_vfsuid(struct user_namespace *mnt_userns,
*
* Return: true if @vfsuid has a mapping in the filesystem, false if not.
*/
-static inline bool vfsuid_has_fsmapping(struct user_namespace *mnt_userns,
+static inline bool vfsuid_has_fsmapping(struct mnt_idmap *idmap,
struct user_namespace *fs_userns,
vfsuid_t vfsuid)
{
- return uid_valid(from_vfsuid(mnt_userns, fs_userns, vfsuid));
+ return uid_valid(from_vfsuid(idmap, fs_userns, vfsuid));
}
static inline bool vfsuid_has_mapping(struct user_namespace *userns,
@@ -310,35 +166,8 @@ static inline kuid_t vfsuid_into_kuid(vfsuid_t vfsuid)
}
/**
- * from_vfsgid - map a vfsgid into the filesystem idmapping
- * @mnt_userns: the mount's idmapping
- * @fs_userns: the filesystem's idmapping
- * @vfsgid : vfsgid to be mapped
- *
- * Map @vfsgid into the filesystem idmapping. This function has to be used in
- * order to e.g. write @vfsgid to inode->i_gid.
- *
- * Return: @vfsgid mapped into the filesystem idmapping
- */
-static inline kgid_t from_vfsgid(struct user_namespace *mnt_userns,
- struct user_namespace *fs_userns,
- vfsgid_t vfsgid)
-{
- gid_t gid;
-
- if (no_idmapping(mnt_userns, fs_userns))
- return AS_KGIDT(vfsgid);
- gid = from_kgid(mnt_userns, AS_KGIDT(vfsgid));
- if (gid == (gid_t)-1)
- return INVALID_GID;
- if (initial_idmapping(fs_userns))
- return KGIDT_INIT(gid);
- return make_kgid(fs_userns, gid);
-}
-
-/**
* vfsgid_has_fsmapping - check whether a vfsgid maps into the filesystem
- * @mnt_userns: the mount's idmapping
+ * @idmap: the mount's idmapping
* @fs_userns: the filesystem's idmapping
* @vfsgid: vfsgid to be mapped
*
@@ -348,11 +177,11 @@ static inline kgid_t from_vfsgid(struct user_namespace *mnt_userns,
*
* Return: true if @vfsgid has a mapping in the filesystem, false if not.
*/
-static inline bool vfsgid_has_fsmapping(struct user_namespace *mnt_userns,
+static inline bool vfsgid_has_fsmapping(struct mnt_idmap *idmap,
struct user_namespace *fs_userns,
vfsgid_t vfsgid)
{
- return gid_valid(from_vfsgid(mnt_userns, fs_userns, vfsgid));
+ return gid_valid(from_vfsgid(idmap, fs_userns, vfsgid));
}
static inline bool vfsgid_has_mapping(struct user_namespace *userns,
@@ -375,8 +204,8 @@ static inline kgid_t vfsgid_into_kgid(vfsgid_t vfsgid)
}
/**
- * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns
- * @mnt_userns: the mount's idmapping
+ * mapped_fsuid - return caller's fsuid mapped according to an idmapping
+ * @idmap: the mount's idmapping
* @fs_userns: the filesystem's idmapping
*
* Use this helper to initialize a new vfs or filesystem object based on
@@ -385,18 +214,17 @@ static inline kgid_t vfsgid_into_kgid(vfsgid_t vfsgid)
* O_CREAT. Other examples include the allocation of quotas for a specific
* user.
*
- * Return: the caller's current fsuid mapped up according to @mnt_userns.
+ * Return: the caller's current fsuid mapped up according to @idmap.
*/
-static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns,
+static inline kuid_t mapped_fsuid(struct mnt_idmap *idmap,
struct user_namespace *fs_userns)
{
- return from_vfsuid(mnt_userns, fs_userns,
- VFSUIDT_INIT(current_fsuid()));
+ return from_vfsuid(idmap, fs_userns, VFSUIDT_INIT(current_fsuid()));
}
/**
- * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns
- * @mnt_userns: the mount's idmapping
+ * mapped_fsgid - return caller's fsgid mapped according to an idmapping
+ * @idmap: the mount's idmapping
* @fs_userns: the filesystem's idmapping
*
* Use this helper to initialize a new vfs or filesystem object based on
@@ -405,13 +233,15 @@ static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns,
* O_CREAT. Other examples include the allocation of quotas for a specific
* user.
*
- * Return: the caller's current fsgid mapped up according to @mnt_userns.
+ * Return: the caller's current fsgid mapped up according to @idmap.
*/
-static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns,
+static inline kgid_t mapped_fsgid(struct mnt_idmap *idmap,
struct user_namespace *fs_userns)
{
- return from_vfsgid(mnt_userns, fs_userns,
- VFSGIDT_INIT(current_fsgid()));
+ return from_vfsgid(idmap, fs_userns, VFSGIDT_INIT(current_fsgid()));
}
+bool check_fsmapping(const struct mnt_idmap *idmap,
+ const struct super_block *sb);
+
#endif /* _LINUX_MNT_IDMAPPING_H */
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 62475996fac6..52f452b2259a 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -74,8 +74,6 @@ struct vfsmount {
struct mnt_idmap *mnt_idmap;
} __randomize_layout;
-struct user_namespace *mnt_user_ns(const struct vfsmount *mnt);
-struct user_namespace *mnt_idmap_owner(const struct mnt_idmap *idmap);
static inline struct mnt_idmap *mnt_idmap(const struct vfsmount *mnt)
{
/* Pairs with smp_store_release() in do_idmap_mount(). */
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 25765556223a..a3f8cdca90c8 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -7,7 +7,6 @@
#define __LINUX_MTD_SPI_NOR_H
#include <linux/bitops.h>
-#include <linux/mtd/cfi.h>
#include <linux/mtd/mtd.h>
#include <linux/spi/spi-mem.h>
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 00fee52df842..0d4531fd46e7 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -68,11 +68,11 @@ extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int);
extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int);
extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int);
-struct dentry *lookup_one(struct user_namespace *, const char *, struct dentry *, int);
-struct dentry *lookup_one_unlocked(struct user_namespace *mnt_userns,
+struct dentry *lookup_one(struct mnt_idmap *, const char *, struct dentry *, int);
+struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap,
const char *name, struct dentry *base,
int len);
-struct dentry *lookup_one_positive_unlocked(struct user_namespace *mnt_userns,
+struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap,
const char *name,
struct dentry *base, int len);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index aad12a179e54..e6e02184c25a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2839,8 +2839,6 @@ int unregister_netdevice_notifier(struct notifier_block *nb);
int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb);
int unregister_netdevice_notifier_net(struct net *net,
struct notifier_block *nb);
-void move_netdevice_notifier_net(struct net *src_net, struct net *dst_net,
- struct notifier_block *nb);
int register_netdevice_notifier_dev_net(struct net_device *dev,
struct notifier_block *nb,
struct netdev_net_notifier *nn);
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index ab934ad951a8..e8c350a3ade1 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -197,7 +197,7 @@ struct ip_set_region {
};
/* Max range where every element is added/deleted in one step */
-#define IPSET_MAX_RANGE (1<<20)
+#define IPSET_MAX_RANGE (1<<14)
/* The max revision number supported by any set type + 1 */
#define IPSET_REVISION_MAX 9
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index d92fdfd2444c..d6c119e31d7a 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -392,11 +392,11 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
extern int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr);
-extern int nfs_getattr(struct user_namespace *, const struct path *,
+extern int nfs_getattr(struct mnt_idmap *, const struct path *,
struct kstat *, u32, unsigned int);
extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *, const struct cred *);
extern void nfs_access_set_mask(struct nfs_access_entry *, u32);
-extern int nfs_permission(struct user_namespace *, struct inode *, int);
+extern int nfs_permission(struct mnt_idmap *, struct inode *, int);
extern int nfs_open(struct inode *, struct file *);
extern int nfs_attribute_cache_expired(struct inode *inode);
extern int nfs_revalidate_inode(struct inode *inode, unsigned long flags);
@@ -405,7 +405,7 @@ extern int nfs_clear_invalid_mapping(struct address_space *mapping);
extern bool nfs_mapping_need_revalidate_inode(struct inode *inode);
extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
extern int nfs_revalidate_mapping_rcu(struct inode *inode);
-extern int nfs_setattr(struct user_namespace *, struct dentry *, struct iattr *);
+extern int nfs_setattr(struct mnt_idmap *, struct dentry *, struct iattr *);
extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, struct nfs_fattr *);
extern void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr);
extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h
index 50caa117cb62..bb15c9234e21 100644
--- a/include/linux/nvmem-provider.h
+++ b/include/linux/nvmem-provider.h
@@ -70,7 +70,6 @@ struct nvmem_keepout {
* @word_size: Minimum read/write access granularity.
* @stride: Minimum read/write access stride.
* @priv: User context passed to read/write callbacks.
- * @wp-gpio: Write protect pin
* @ignore_wp: Write Protect pin is managed by the provider.
*
* Note: A default "nvmem<id>" name will be assigned to the device if
@@ -85,7 +84,6 @@ struct nvmem_config {
const char *name;
int id;
struct module *owner;
- struct gpio_desc *wp_gpio;
const struct nvmem_cell_info *cells;
int ncells;
const struct nvmem_keepout *keepout;
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 2e677e6ad09f..d7c2d33baa7f 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -301,7 +301,7 @@ static inline bool folio_ref_try_add_rcu(struct folio *folio, int count)
*
* You can also use this function if you're holding a lock that prevents
* pages being frozen & removed; eg the i_pages lock for the page cache
- * or the mmap_sem or page table lock for page tables. In this case,
+ * or the mmap_lock or page table lock for page tables. In this case,
* it will always succeed, and you could have used a plain folio_get(),
* but it's sometimes more convenient to have a common function called
* from both locked and RCU-protected contexts.
diff --git a/include/linux/pci.h b/include/linux/pci.h
index adffd65e84b4..254c8a4126a8 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1621,6 +1621,18 @@ pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
flags, NULL);
}
+static inline struct msi_map pci_msix_alloc_irq_at(struct pci_dev *dev, unsigned int index,
+ const struct irq_affinity_desc *affdesc)
+{
+ struct msi_map map = { .index = -ENOSYS, };
+
+ return map;
+}
+
+static inline void pci_msix_free_irq(struct pci_dev *pdev, struct msi_map map)
+{
+}
+
static inline void pci_free_irq_vectors(struct pci_dev *dev)
{
}
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index ef914a600087..525b5d64e394 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -100,7 +100,6 @@ struct arm_pmu {
void (*stop)(struct arm_pmu *);
void (*reset)(void *);
int (*map_event)(struct perf_event *event);
- bool (*filter)(struct pmu *pmu, int cpu);
int num_events;
bool secure_access; /* 32-bit ARM only */
#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 71eeb4e3b1fd..6378c997ded5 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -826,10 +826,7 @@ struct phy_driver {
* whether to advertise lower-speed modes for that interface. It is
* assumed that if a rate matching mode is supported on an interface,
* then that interface's rate can be adapted to all slower link speeds
- * supported by the phy. If iface is %PHY_INTERFACE_MODE_NA, and the phy
- * supports any kind of rate matching for any interface, then it must
- * return that rate matching mode (preferring %RATE_MATCH_PAUSE to
- * %RATE_MATCH_CRS). If the interface is not supported, this should
+ * supported by the phy. If the interface is not supported, this should
* return %RATE_MATCH_NONE.
*/
int (*get_rate_matching)(struct phy_device *phydev,
diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h
new file mode 100644
index 000000000000..f9c5ac80d59b
--- /dev/null
+++ b/include/linux/pktcdvd.h
@@ -0,0 +1,197 @@
+/*
+ * Copyright (C) 2000 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2001-2004 Peter Osterlund <petero2@telia.com>
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License. See linux/COPYING for more information.
+ *
+ * Packet writing layer for ATAPI and SCSI CD-R, CD-RW, DVD-R, and
+ * DVD-RW devices.
+ *
+ */
+#ifndef __PKTCDVD_H
+#define __PKTCDVD_H
+
+#include <linux/blkdev.h>
+#include <linux/completion.h>
+#include <linux/cdrom.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/mempool.h>
+#include <uapi/linux/pktcdvd.h>
+
+/* default bio write queue congestion marks */
+#define PKT_WRITE_CONGESTION_ON 10000
+#define PKT_WRITE_CONGESTION_OFF 9000
+
+
+struct packet_settings
+{
+ __u32 size; /* packet size in (512 byte) sectors */
+ __u8 fp; /* fixed packets */
+ __u8 link_loss; /* the rest is specified
+ * as per Mt Fuji */
+ __u8 write_type;
+ __u8 track_mode;
+ __u8 block_mode;
+};
+
+/*
+ * Very crude stats for now
+ */
+struct packet_stats
+{
+ unsigned long pkt_started;
+ unsigned long pkt_ended;
+ unsigned long secs_w;
+ unsigned long secs_rg;
+ unsigned long secs_r;
+};
+
+struct packet_cdrw
+{
+ struct list_head pkt_free_list;
+ struct list_head pkt_active_list;
+ spinlock_t active_list_lock; /* Serialize access to pkt_active_list */
+ struct task_struct *thread;
+ atomic_t pending_bios;
+};
+
+/*
+ * Switch to high speed reading after reading this many kilobytes
+ * with no interspersed writes.
+ */
+#define HI_SPEED_SWITCH 512
+
+struct packet_iosched
+{
+ atomic_t attention; /* Set to non-zero when queue processing is needed */
+ int writing; /* Non-zero when writing, zero when reading */
+ spinlock_t lock; /* Protecting read/write queue manipulations */
+ struct bio_list read_queue;
+ struct bio_list write_queue;
+ sector_t last_write; /* The sector where the last write ended */
+ int successive_reads;
+};
+
+/*
+ * 32 buffers of 2048 bytes
+ */
+#if (PAGE_SIZE % CD_FRAMESIZE) != 0
+#error "PAGE_SIZE must be a multiple of CD_FRAMESIZE"
+#endif
+#define PACKET_MAX_SIZE 128
+#define FRAMES_PER_PAGE (PAGE_SIZE / CD_FRAMESIZE)
+#define PACKET_MAX_SECTORS (PACKET_MAX_SIZE * CD_FRAMESIZE >> 9)
+
+enum packet_data_state {
+ PACKET_IDLE_STATE, /* Not used at the moment */
+ PACKET_WAITING_STATE, /* Waiting for more bios to arrive, so */
+ /* we don't have to do as much */
+ /* data gathering */
+ PACKET_READ_WAIT_STATE, /* Waiting for reads to fill in holes */
+ PACKET_WRITE_WAIT_STATE, /* Waiting for the write to complete */
+ PACKET_RECOVERY_STATE, /* Recover after read/write errors */
+ PACKET_FINISHED_STATE, /* After write has finished */
+
+ PACKET_NUM_STATES /* Number of possible states */
+};
+
+/*
+ * Information needed for writing a single packet
+ */
+struct pktcdvd_device;
+
+struct packet_data
+{
+ struct list_head list;
+
+ spinlock_t lock; /* Lock protecting state transitions and */
+ /* orig_bios list */
+
+ struct bio_list orig_bios; /* Original bios passed to pkt_make_request */
+ /* that will be handled by this packet */
+ int write_size; /* Total size of all bios in the orig_bios */
+ /* list, measured in number of frames */
+
+ struct bio *w_bio; /* The bio we will send to the real CD */
+ /* device once we have all data for the */
+ /* packet we are going to write */
+ sector_t sector; /* First sector in this packet */
+ int frames; /* Number of frames in this packet */
+
+ enum packet_data_state state; /* Current state */
+ atomic_t run_sm; /* Incremented whenever the state */
+ /* machine needs to be run */
+ long sleep_time; /* Set this to non-zero to make the state */
+ /* machine run after this many jiffies. */
+
+ atomic_t io_wait; /* Number of pending IO operations */
+ atomic_t io_errors; /* Number of read/write errors during IO */
+
+ struct bio *r_bios[PACKET_MAX_SIZE]; /* bios to use during data gathering */
+ struct page *pages[PACKET_MAX_SIZE / FRAMES_PER_PAGE];
+
+ int cache_valid; /* If non-zero, the data for the zone defined */
+ /* by the sector variable is completely cached */
+ /* in the pages[] vector. */
+
+ int id; /* ID number for debugging */
+ struct pktcdvd_device *pd;
+};
+
+struct pkt_rb_node {
+ struct rb_node rb_node;
+ struct bio *bio;
+};
+
+struct packet_stacked_data
+{
+ struct bio *bio; /* Original read request bio */
+ struct pktcdvd_device *pd;
+};
+#define PSD_POOL_SIZE 64
+
+struct pktcdvd_device
+{
+ struct block_device *bdev; /* dev attached */
+ dev_t pkt_dev; /* our dev */
+ char name[20];
+ struct packet_settings settings;
+ struct packet_stats stats;
+ int refcnt; /* Open count */
+ int write_speed; /* current write speed, kB/s */
+ int read_speed; /* current read speed, kB/s */
+ unsigned long offset; /* start offset */
+ __u8 mode_offset; /* 0 / 8 */
+ __u8 type;
+ unsigned long flags;
+ __u16 mmc3_profile;
+ __u32 nwa; /* next writable address */
+ __u32 lra; /* last recorded address */
+ struct packet_cdrw cdrw;
+ wait_queue_head_t wqueue;
+
+ spinlock_t lock; /* Serialize access to bio_queue */
+ struct rb_root bio_queue; /* Work queue of bios we need to handle */
+ int bio_queue_size; /* Number of nodes in bio_queue */
+ bool congested; /* Someone is waiting for bio_queue_size
+ * to drop. */
+ sector_t current_sector; /* Keep track of where the elevator is */
+ atomic_t scan_queue; /* Set to non-zero when pkt_handle_queue */
+ /* needs to be run. */
+ mempool_t rb_pool; /* mempool for pkt_rb_node allocations */
+
+ struct packet_iosched iosched;
+ struct gendisk *disk;
+
+ int write_congestion_off;
+ int write_congestion_on;
+
+ struct device *dev; /* sysfs pktcdvd[0-7] dev */
+
+ struct dentry *dfs_d_root; /* debugfs: devname directory */
+ struct dentry *dfs_f_info; /* debugfs: info file */
+};
+
+#endif /* __PKTCDVD_H */
diff --git a/include/linux/platform_data/x86/simatic-ipc.h b/include/linux/platform_data/x86/simatic-ipc.h
index 632320ec8f08..a48bb5240977 100644
--- a/include/linux/platform_data/x86/simatic-ipc.h
+++ b/include/linux/platform_data/x86/simatic-ipc.h
@@ -32,7 +32,8 @@ enum simatic_ipc_station_ids {
SIMATIC_IPC_IPC477E = 0x00000A02,
SIMATIC_IPC_IPC127E = 0x00000D01,
SIMATIC_IPC_IPC227G = 0x00000F01,
- SIMATIC_IPC_IPC427G = 0x00001001,
+ SIMATIC_IPC_IPCBX_39A = 0x00001001,
+ SIMATIC_IPC_IPCPX_39A = 0x00001002,
};
static inline u32 simatic_ipc_get_station_id(u8 *data, int max_len)
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 2d3249eb0e62..0e8a1f2ceb2f 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -84,4 +84,7 @@
/********** kernel/bpf/ **********/
#define BPF_PTR_POISON ((void *)(0xeB9FUL + POISON_POINTER_DELTA))
+/********** VFS **********/
+#define VFS_PTR_POISON ((void *)(0xF5 + POISON_POINTER_DELTA))
+
#endif
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index ee608d22ecb9..21cc29b8a9e8 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -69,20 +69,20 @@ extern int __posix_acl_create(struct posix_acl **, gfp_t, umode_t *);
extern int __posix_acl_chmod(struct posix_acl **, gfp_t, umode_t);
extern struct posix_acl *get_posix_acl(struct inode *, int);
-int set_posix_acl(struct user_namespace *, struct dentry *, int,
+int set_posix_acl(struct mnt_idmap *, struct dentry *, int,
struct posix_acl *);
struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type);
struct posix_acl *posix_acl_clone(const struct posix_acl *acl, gfp_t flags);
#ifdef CONFIG_FS_POSIX_ACL
-int posix_acl_chmod(struct user_namespace *, struct dentry *, umode_t);
+int posix_acl_chmod(struct mnt_idmap *, struct dentry *, umode_t);
extern int posix_acl_create(struct inode *, umode_t *, struct posix_acl **,
struct posix_acl **);
-int posix_acl_update_mode(struct user_namespace *, struct inode *, umode_t *,
+int posix_acl_update_mode(struct mnt_idmap *, struct inode *, umode_t *,
struct posix_acl **);
-int simple_set_acl(struct user_namespace *, struct dentry *,
+int simple_set_acl(struct mnt_idmap *, struct dentry *,
struct posix_acl *, int);
extern int simple_acl_create(struct inode *, struct inode *);
@@ -91,7 +91,7 @@ void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl);
void forget_cached_acl(struct inode *inode, int type);
void forget_all_cached_acls(struct inode *inode);
int posix_acl_valid(struct user_namespace *, const struct posix_acl *);
-int posix_acl_permission(struct user_namespace *, struct inode *,
+int posix_acl_permission(struct mnt_idmap *, struct inode *,
const struct posix_acl *, int);
static inline void cache_no_acl(struct inode *inode)
@@ -100,14 +100,14 @@ static inline void cache_no_acl(struct inode *inode)
inode->i_default_acl = NULL;
}
-int vfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int vfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
const char *acl_name, struct posix_acl *kacl);
-struct posix_acl *vfs_get_acl(struct user_namespace *mnt_userns,
+struct posix_acl *vfs_get_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name);
-int vfs_remove_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int vfs_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry,
const char *acl_name);
#else
-static inline int posix_acl_chmod(struct user_namespace *mnt_userns,
+static inline int posix_acl_chmod(struct mnt_idmap *idmap,
struct dentry *dentry, umode_t mode)
{
return 0;
@@ -134,21 +134,21 @@ static inline void forget_all_cached_acls(struct inode *inode)
{
}
-static inline int vfs_set_acl(struct user_namespace *mnt_userns,
+static inline int vfs_set_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name,
struct posix_acl *acl)
{
return -EOPNOTSUPP;
}
-static inline struct posix_acl *vfs_get_acl(struct user_namespace *mnt_userns,
+static inline struct posix_acl *vfs_get_acl(struct mnt_idmap *idmap,
struct dentry *dentry,
const char *acl_name)
{
return ERR_PTR(-EOPNOTSUPP);
}
-static inline int vfs_remove_acl(struct user_namespace *mnt_userns,
+static inline int vfs_remove_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name)
{
return -EOPNOTSUPP;
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 0d8625d71733..11a4becff3a9 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -20,12 +20,12 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb)
}
/* i_mutex must being held */
-static inline bool is_quota_modification(struct user_namespace *mnt_userns,
+static inline bool is_quota_modification(struct mnt_idmap *idmap,
struct inode *inode, struct iattr *ia)
{
return ((ia->ia_valid & ATTR_SIZE) ||
- i_uid_needs_update(mnt_userns, ia, inode) ||
- i_gid_needs_update(mnt_userns, ia, inode));
+ i_uid_needs_update(idmap, ia, inode) ||
+ i_gid_needs_update(idmap, ia, inode));
}
#if defined(CONFIG_QUOTA)
@@ -116,7 +116,7 @@ int dquot_set_dqblk(struct super_block *sb, struct kqid id,
struct qc_dqblk *di);
int __dquot_transfer(struct inode *inode, struct dquot **transfer_to);
-int dquot_transfer(struct user_namespace *mnt_userns, struct inode *inode,
+int dquot_transfer(struct mnt_idmap *idmap, struct inode *inode,
struct iattr *iattr);
static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
@@ -236,7 +236,7 @@ static inline void dquot_free_inode(struct inode *inode)
{
}
-static inline int dquot_transfer(struct user_namespace *mnt_userns,
+static inline int dquot_transfer(struct mnt_idmap *idmap,
struct inode *inode, struct iattr *iattr)
{
return 0;
diff --git a/include/linux/security.h b/include/linux/security.h
index 5b67f208f7de..5984d0d550b4 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -153,12 +153,11 @@ extern int cap_capset(struct cred *new, const struct cred *old,
extern int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file);
int cap_inode_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags);
-int cap_inode_removexattr(struct user_namespace *mnt_userns,
+int cap_inode_removexattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name);
int cap_inode_need_killpriv(struct dentry *dentry);
-int cap_inode_killpriv(struct user_namespace *mnt_userns,
- struct dentry *dentry);
-int cap_inode_getsecurity(struct user_namespace *mnt_userns,
+int cap_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry);
+int cap_inode_getsecurity(struct mnt_idmap *idmap,
struct inode *inode, const char *name, void **buffer,
bool alloc);
extern int cap_mmap_addr(unsigned long addr);
@@ -356,29 +355,28 @@ int security_inode_readlink(struct dentry *dentry);
int security_inode_follow_link(struct dentry *dentry, struct inode *inode,
bool rcu);
int security_inode_permission(struct inode *inode, int mask);
-int security_inode_setattr(struct user_namespace *mnt_userns,
+int security_inode_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr);
int security_inode_getattr(const struct path *path);
-int security_inode_setxattr(struct user_namespace *mnt_userns,
+int security_inode_setxattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags);
-int security_inode_set_acl(struct user_namespace *mnt_userns,
+int security_inode_set_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name,
struct posix_acl *kacl);
-int security_inode_get_acl(struct user_namespace *mnt_userns,
+int security_inode_get_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name);
-int security_inode_remove_acl(struct user_namespace *mnt_userns,
+int security_inode_remove_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name);
void security_inode_post_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags);
int security_inode_getxattr(struct dentry *dentry, const char *name);
int security_inode_listxattr(struct dentry *dentry);
-int security_inode_removexattr(struct user_namespace *mnt_userns,
+int security_inode_removexattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name);
int security_inode_need_killpriv(struct dentry *dentry);
-int security_inode_killpriv(struct user_namespace *mnt_userns,
- struct dentry *dentry);
-int security_inode_getsecurity(struct user_namespace *mnt_userns,
+int security_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry);
+int security_inode_getsecurity(struct mnt_idmap *idmap,
struct inode *inode, const char *name,
void **buffer, bool alloc);
int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags);
@@ -862,7 +860,7 @@ static inline int security_inode_permission(struct inode *inode, int mask)
return 0;
}
-static inline int security_inode_setattr(struct user_namespace *mnt_userns,
+static inline int security_inode_setattr(struct mnt_idmap *idmap,
struct dentry *dentry,
struct iattr *attr)
{
@@ -874,14 +872,14 @@ static inline int security_inode_getattr(const struct path *path)
return 0;
}
-static inline int security_inode_setxattr(struct user_namespace *mnt_userns,
+static inline int security_inode_setxattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name, const void *value,
size_t size, int flags)
{
return cap_inode_setxattr(dentry, name, value, size, flags);
}
-static inline int security_inode_set_acl(struct user_namespace *mnt_userns,
+static inline int security_inode_set_acl(struct mnt_idmap *idmap,
struct dentry *dentry,
const char *acl_name,
struct posix_acl *kacl)
@@ -889,14 +887,14 @@ static inline int security_inode_set_acl(struct user_namespace *mnt_userns,
return 0;
}
-static inline int security_inode_get_acl(struct user_namespace *mnt_userns,
+static inline int security_inode_get_acl(struct mnt_idmap *idmap,
struct dentry *dentry,
const char *acl_name)
{
return 0;
}
-static inline int security_inode_remove_acl(struct user_namespace *mnt_userns,
+static inline int security_inode_remove_acl(struct mnt_idmap *idmap,
struct dentry *dentry,
const char *acl_name)
{
@@ -918,11 +916,11 @@ static inline int security_inode_listxattr(struct dentry *dentry)
return 0;
}
-static inline int security_inode_removexattr(struct user_namespace *mnt_userns,
+static inline int security_inode_removexattr(struct mnt_idmap *idmap,
struct dentry *dentry,
const char *name)
{
- return cap_inode_removexattr(mnt_userns, dentry, name);
+ return cap_inode_removexattr(idmap, dentry, name);
}
static inline int security_inode_need_killpriv(struct dentry *dentry)
@@ -930,18 +928,18 @@ static inline int security_inode_need_killpriv(struct dentry *dentry)
return cap_inode_need_killpriv(dentry);
}
-static inline int security_inode_killpriv(struct user_namespace *mnt_userns,
+static inline int security_inode_killpriv(struct mnt_idmap *idmap,
struct dentry *dentry)
{
- return cap_inode_killpriv(mnt_userns, dentry);
+ return cap_inode_killpriv(idmap, dentry);
}
-static inline int security_inode_getsecurity(struct user_namespace *mnt_userns,
+static inline int security_inode_getsecurity(struct mnt_idmap *idmap,
struct inode *inode,
const char *name, void **buffer,
bool alloc)
{
- return cap_inode_getsecurity(mnt_userns, inode, name, buffer, alloc);
+ return cap_inode_getsecurity(idmap, inode, name, buffer, alloc);
}
static inline int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags)
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 71310efe2fab..7bde8e1c228a 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -107,7 +107,7 @@ extern void synchronize_shrinkers(void);
#ifdef CONFIG_SHRINKER_DEBUG
extern int shrinker_debugfs_add(struct shrinker *shrinker);
-extern void shrinker_debugfs_remove(struct shrinker *shrinker);
+extern struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker);
extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker,
const char *fmt, ...);
#else /* CONFIG_SHRINKER_DEBUG */
@@ -115,8 +115,9 @@ static inline int shrinker_debugfs_add(struct shrinker *shrinker)
{
return 0;
}
-static inline void shrinker_debugfs_remove(struct shrinker *shrinker)
+static inline struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker)
{
+ return NULL;
}
static inline __printf(2, 3)
int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
diff --git a/include/linux/soc/ti/omap1-io.h b/include/linux/soc/ti/omap1-io.h
index f7f12728d4a6..9a60f45899d3 100644
--- a/include/linux/soc/ti/omap1-io.h
+++ b/include/linux/soc/ti/omap1-io.h
@@ -5,7 +5,7 @@
#ifndef __ASSEMBLER__
#include <linux/types.h>
-#ifdef CONFIG_ARCH_OMAP1_ANY
+#ifdef CONFIG_ARCH_OMAP1
/*
* NOTE: Please use ioremap + __raw_read/write where possible instead of these
*/
@@ -15,7 +15,7 @@ extern u32 omap_readl(u32 pa);
extern void omap_writeb(u8 v, u32 pa);
extern void omap_writew(u16 v, u32 pa);
extern void omap_writel(u32 v, u32 pa);
-#else
+#elif defined(CONFIG_COMPILE_TEST)
static inline u8 omap_readb(u32 pa) { return 0; }
static inline u16 omap_readw(u32 pa) { return 0; }
static inline u32 omap_readl(u32 pa) { return 0; }
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 1341f7d62da4..be48f1cb1878 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -476,6 +476,15 @@ extern int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock,
#define atomic_dec_and_lock_irqsave(atomic, lock, flags) \
__cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags)))
+extern int _atomic_dec_and_raw_lock(atomic_t *atomic, raw_spinlock_t *lock);
+#define atomic_dec_and_raw_lock(atomic, lock) \
+ __cond_lock(lock, _atomic_dec_and_raw_lock(atomic, lock))
+
+extern int _atomic_dec_and_raw_lock_irqsave(atomic_t *atomic, raw_spinlock_t *lock,
+ unsigned long *flags);
+#define atomic_dec_and_raw_lock_irqsave(atomic, lock, flags) \
+ __cond_lock(lock, _atomic_dec_and_raw_lock_irqsave(atomic, lock, &(flags)))
+
int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask,
size_t max_size, unsigned int cpu_mult,
gfp_t gfp, const char *name,
diff --git a/include/linux/stat.h b/include/linux/stat.h
index ff277ced50e9..52150570d37a 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -52,6 +52,15 @@ struct kstat {
u64 mnt_id;
u32 dio_mem_align;
u32 dio_offset_align;
+ u64 change_cookie;
};
+/* These definitions are internal to the kernel for now. Mainly used by nfsd. */
+
+/* mask values */
+#define STATX_CHANGE_COOKIE 0x40000000U /* Want/got stx_change_attr */
+
+/* file attribute values */
+#define STATX_ATTR_CHANGE_MONOTONIC 0x8000000000000000ULL /* version monotonically increases */
+
#endif
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 83ca2e8eb6b5..a152678b82b7 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -252,6 +252,7 @@ struct plat_stmmacenet_data {
int rss_en;
int mac_port_sel_speed;
bool en_tx_lpi_clockgating;
+ bool rx_clk_runs_in_lpi;
int has_xgmac;
bool vlan_fail_q_en;
u8 vlan_fail_q;
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index cd188a527d16..3b35b6f6533a 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -92,6 +92,11 @@ extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *,
char __user *, size_t);
extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *);
+/* returns true if the msg is in-flight, i.e., already eaten by the peer */
+static inline bool rpc_msg_is_inflight(const struct rpc_pipe_msg *msg) {
+ return (msg->copied != 0 && list_empty(&msg->list));
+}
+
struct rpc_clnt;
extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *);
extern int rpc_remove_client_dir(struct rpc_clnt *);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 2787b84eaf12..0ceed49516ad 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -418,8 +418,7 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
- unsigned int reclaim_options,
- nodemask_t *nodemask);
+ unsigned int reclaim_options);
extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
pg_data_t *pgdat,
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index dfeb25a0362d..4dc97b9f65fb 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -265,6 +265,7 @@ enum tpm2_startup_types {
enum tpm2_cc_attrs {
TPM2_CC_ATTR_CHANDLES = 25,
TPM2_CC_ATTR_RHANDLE = 28,
+ TPM2_CC_ATTR_VENDOR = 29,
};
#define TPM_VID_INTEL 0x8086
diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h
index 20c0ff54b7a0..7d68a5cc5881 100644
--- a/include/linux/tpm_eventlog.h
+++ b/include/linux/tpm_eventlog.h
@@ -198,8 +198,8 @@ static __always_inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *ev
* The loop below will unmap these fields if the log is larger than
* one page, so save them here for reference:
*/
- count = READ_ONCE(event->count);
- event_type = READ_ONCE(event->event_type);
+ count = event->count;
+ event_type = event->event_type;
/* Verify that it's the log header */
if (event_header->pcr_idx != 0 ||
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 4342e996bcdb..0e373222a6df 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -270,6 +270,7 @@ struct trace_event_fields {
const int align;
const int is_signed;
const int filter_type;
+ const int len;
};
int (*define_fields)(struct trace_event_call *);
};
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 7d5325d47c45..86d1c8e79566 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -267,16 +267,15 @@ static inline void *usb_get_intfdata(struct usb_interface *intf)
}
/**
- * usb_set_intfdata() - associate driver-specific data with the interface
- * @intf: the usb interface
- * @data: pointer to the device priv structure or %NULL
+ * usb_set_intfdata() - associate driver-specific data with an interface
+ * @intf: USB interface
+ * @data: driver data
*
- * Drivers should use this function in their probe() to associate their
- * driver-specific data with the usb interface.
+ * Drivers can use this function in their probe() callbacks to associate
+ * driver-specific data with an interface.
*
- * When disconnecting, the core will take care of setting @intf back to %NULL,
- * so no actions are needed on the driver side. The interface should not be set
- * to %NULL before all actions completed (e.g. no outsanding URB remaining).
+ * Note that there is generally no need to clear the driver-data pointer even
+ * if some drivers do so for historical or implementation-specific reasons.
*/
static inline void usb_set_intfdata(struct usb_interface *intf, void *data)
{
@@ -774,11 +773,14 @@ extern struct device *usb_intf_get_dma_device(struct usb_interface *intf);
extern int usb_acpi_set_power_state(struct usb_device *hdev, int index,
bool enable);
extern bool usb_acpi_power_manageable(struct usb_device *hdev, int index);
+extern int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index);
#else
static inline int usb_acpi_set_power_state(struct usb_device *hdev, int index,
bool enable) { return 0; }
static inline bool usb_acpi_power_manageable(struct usb_device *hdev, int index)
{ return true; }
+static inline int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index)
+ { return 0; }
#endif
/* USB autosuspend and autoresume */
diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h
index 72299f261b25..43db6e47503c 100644
--- a/include/linux/util_macros.h
+++ b/include/linux/util_macros.h
@@ -38,4 +38,16 @@
*/
#define find_closest_descending(x, a, as) __find_closest(x, a, as, >=)
+/**
+ * is_insidevar - check if the @ptr points inside the @var memory range.
+ * @ptr: the pointer to a memory address.
+ * @var: the variable which address and size identify the memory range.
+ *
+ * Evaluates to true if the address in @ptr lies within the memory
+ * range allocated to @var.
+ */
+#define is_insidevar(ptr, var) \
+ ((uintptr_t)(ptr) >= (uintptr_t)(var) && \
+ (uintptr_t)(ptr) < (uintptr_t)(var) + sizeof(var))
+
#endif
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 2e7dd44926e4..6af72461397d 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -42,7 +42,7 @@ struct xattr_handler {
struct inode *inode, const char *name, void *buffer,
size_t size);
int (*set)(const struct xattr_handler *,
- struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct mnt_idmap *idmap, struct dentry *dentry,
struct inode *inode, const char *name, const void *buffer,
size_t size, int flags);
};
@@ -56,25 +56,25 @@ struct xattr {
};
ssize_t __vfs_getxattr(struct dentry *, struct inode *, const char *, void *, size_t);
-ssize_t vfs_getxattr(struct user_namespace *, struct dentry *, const char *,
+ssize_t vfs_getxattr(struct mnt_idmap *, struct dentry *, const char *,
void *, size_t);
ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size);
-int __vfs_setxattr(struct user_namespace *, struct dentry *, struct inode *,
+int __vfs_setxattr(struct mnt_idmap *, struct dentry *, struct inode *,
const char *, const void *, size_t, int);
-int __vfs_setxattr_noperm(struct user_namespace *, struct dentry *,
+int __vfs_setxattr_noperm(struct mnt_idmap *, struct dentry *,
const char *, const void *, size_t, int);
-int __vfs_setxattr_locked(struct user_namespace *, struct dentry *,
+int __vfs_setxattr_locked(struct mnt_idmap *, struct dentry *,
const char *, const void *, size_t, int,
struct inode **);
-int vfs_setxattr(struct user_namespace *, struct dentry *, const char *,
+int vfs_setxattr(struct mnt_idmap *, struct dentry *, const char *,
const void *, size_t, int);
-int __vfs_removexattr(struct user_namespace *, struct dentry *, const char *);
-int __vfs_removexattr_locked(struct user_namespace *, struct dentry *,
+int __vfs_removexattr(struct mnt_idmap *, struct dentry *, const char *);
+int __vfs_removexattr_locked(struct mnt_idmap *, struct dentry *,
const char *, struct inode **);
-int vfs_removexattr(struct user_namespace *, struct dentry *, const char *);
+int vfs_removexattr(struct mnt_idmap *, struct dentry *, const char *);
ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size);
-int vfs_getxattr_alloc(struct user_namespace *mnt_userns,
+int vfs_getxattr_alloc(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name,
char **xattr_value, size_t size, gfp_t flags);
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index d5a5ae926380..ba717eac0229 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -15,6 +15,7 @@ struct key;
struct sock;
struct socket;
struct rxrpc_call;
+enum rxrpc_abort_reason;
enum rxrpc_interruptibility {
RXRPC_INTERRUPTIBLE, /* Call is interruptible */
@@ -55,7 +56,7 @@ int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *,
struct iov_iter *, size_t *, bool, u32 *, u16 *);
bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
- u32, int, const char *);
+ u32, int, enum rxrpc_abort_reason);
void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *);
void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
struct sockaddr_rxrpc *);
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 69174093078f..99bd823e97f6 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -108,6 +108,10 @@ struct inet_bind2_bucket {
struct hlist_node node;
/* List of sockets hashed to this bucket */
struct hlist_head owners;
+ /* bhash has twsk in owners, but bhash2 has twsk in
+ * deathrow not to add a member in struct sock_common.
+ */
+ struct hlist_head deathrow;
};
static inline struct net *ib_net(const struct inet_bind_bucket *ib)
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 5b47545f22d3..4a8e578405cb 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -73,9 +73,14 @@ struct inet_timewait_sock {
u32 tw_priority;
struct timer_list tw_timer;
struct inet_bind_bucket *tw_tb;
+ struct inet_bind2_bucket *tw_tb2;
+ struct hlist_node tw_bind2_node;
};
#define tw_tclass tw_tos
+#define twsk_for_each_bound_bhash2(__tw, list) \
+ hlist_for_each_entry(__tw, list, tw_bind2_node)
+
static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk)
{
return (struct inet_timewait_sock *)sk;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 689da327ce2e..e3235b9c02c2 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1832,8 +1832,6 @@ struct ieee80211_vif_cfg {
* @drv_priv: data area for driver use, will always be aligned to
* sizeof(void \*).
* @txq: the multicast data TX queue
- * @txqs_stopped: per AC flag to indicate that intermediate TXQs are stopped,
- * protected by fq->lock.
* @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see
* &enum ieee80211_offload_flags.
* @mbssid_tx_vif: Pointer to the transmitting interface if MBSSID is enabled.
@@ -1863,8 +1861,6 @@ struct ieee80211_vif {
bool probe_req_reg;
bool rx_mcast_action_reg;
- bool txqs_stopped[IEEE80211_NUM_ACS];
-
struct ieee80211_vif *mbssid_tx_vif;
/* must be last */
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index b3ba04615caa..56189e4252da 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -336,9 +336,12 @@ struct gdma_queue_spec {
};
};
+#define MANA_IRQ_NAME_SZ 32
+
struct gdma_irq_context {
void (*handler)(void *arg);
void *arg;
+ char name[MANA_IRQ_NAME_SZ];
};
struct gdma_context {
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index e69ce23566ea..9430128aae99 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -312,17 +312,29 @@ struct nft_set_iter {
/**
* struct nft_set_desc - description of set elements
*
+ * @ktype: key type
* @klen: key length
+ * @dtype: data type
* @dlen: data length
+ * @objtype: object type
+ * @flags: flags
* @size: number of set elements
+ * @policy: set policy
+ * @gc_int: garbage collector interval
* @field_len: length of each field in concatenation, bytes
* @field_count: number of concatenated fields in element
* @expr: set must support for expressions
*/
struct nft_set_desc {
+ u32 ktype;
unsigned int klen;
+ u32 dtype;
unsigned int dlen;
+ u32 objtype;
unsigned int size;
+ u32 policy;
+ u32 gc_int;
+ u64 timeout;
u8 field_len[NFT_REG32_COUNT];
u8 field_count;
bool expr;
@@ -585,7 +597,9 @@ void *nft_set_catchall_gc(const struct nft_set *set);
static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
{
- return set->gc_int ? msecs_to_jiffies(set->gc_int) : HZ;
+ u32 gc_int = READ_ONCE(set->gc_int);
+
+ return gc_int ? msecs_to_jiffies(gc_int) : HZ;
}
/**
@@ -1558,6 +1572,9 @@ struct nft_trans_rule {
struct nft_trans_set {
struct nft_set *set;
u32 set_id;
+ u32 gc_int;
+ u64 timeout;
+ bool update;
bool bound;
};
@@ -1567,6 +1584,12 @@ struct nft_trans_set {
(((struct nft_trans_set *)trans->data)->set_id)
#define nft_trans_set_bound(trans) \
(((struct nft_trans_set *)trans->data)->bound)
+#define nft_trans_set_update(trans) \
+ (((struct nft_trans_set *)trans->data)->update)
+#define nft_trans_set_timeout(trans) \
+ (((struct nft_trans_set *)trans->data)->timeout)
+#define nft_trans_set_gc_int(trans) \
+ (((struct nft_trans_set *)trans->data)->gc_int)
struct nft_trans_chain {
bool update;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index d5517719af4e..af4aa66aaa4e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -1288,4 +1288,11 @@ void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx);
int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb));
+/* Make sure qdisc is no longer in SCHED state. */
+static inline void qdisc_synchronize(const struct Qdisc *q)
+{
+ while (test_bit(__QDISC_STATE_SCHED, &q->state))
+ msleep(1);
+}
+
#endif
diff --git a/include/net/sock.h b/include/net/sock.h
index dcd72e6285b2..556209727633 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2434,6 +2434,19 @@ static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struc
return false;
}
+static inline struct sk_buff *skb_clone_and_charge_r(struct sk_buff *skb, struct sock *sk)
+{
+ skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
+ if (skb) {
+ if (sk_rmem_schedule(sk, skb, skb->truesize)) {
+ skb_set_owner_r(skb, sk);
+ return skb;
+ }
+ __kfree_skb(skb);
+ }
+ return NULL;
+}
+
static inline void skb_prepare_for_gro(struct sk_buff *skb)
{
if (skb->destructor != sock_wfree) {
diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
index ceed2fc089ff..d323fffb839a 100644
--- a/include/net/tc_wrapper.h
+++ b/include/net/tc_wrapper.h
@@ -216,6 +216,8 @@ skip:
return tp->classify(skb, tp, res);
}
+#endif /* CONFIG_NET_CLS */
+
static inline void tc_wrapper_init(void)
{
#ifdef CONFIG_X86
@@ -224,8 +226,6 @@ static inline void tc_wrapper_init(void)
#endif
}
-#endif /* CONFIG_NET_CLS */
-
#else
#define TC_INDIRECT_SCOPE static
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 695eebc6f2c8..e39fb0736ade 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -422,6 +422,8 @@ extern int iscsi_host_get_max_scsi_cmds(struct Scsi_Host *shost,
extern struct iscsi_cls_session *
iscsi_session_setup(struct iscsi_transport *, struct Scsi_Host *shost,
uint16_t, int, int, uint32_t, unsigned int);
+void iscsi_session_remove(struct iscsi_cls_session *cls_session);
+void iscsi_session_free(struct iscsi_cls_session *cls_session);
extern void iscsi_session_teardown(struct iscsi_cls_session *);
extern void iscsi_session_recovery_timedout(struct iscsi_cls_session *);
extern int iscsi_set_param(struct iscsi_cls_conn *cls_conn,
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index cab52b0f11d0..34c03707fb6e 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -236,6 +236,14 @@ enum {
ISCSI_SESSION_FREE,
};
+enum {
+ ISCSI_SESSION_TARGET_UNBOUND,
+ ISCSI_SESSION_TARGET_ALLOCATED,
+ ISCSI_SESSION_TARGET_SCANNED,
+ ISCSI_SESSION_TARGET_UNBINDING,
+ ISCSI_SESSION_TARGET_MAX,
+};
+
#define ISCSI_MAX_TARGET -1
struct iscsi_cls_session {
@@ -264,6 +272,7 @@ struct iscsi_cls_session {
*/
pid_t creator;
int state;
+ int target_state; /* session target bind state */
int sid; /* session id */
void *dd_data; /* LLD private data */
struct device dev; /* sysfs transport/container device */
diff --git a/include/soc/bcm2835/raspberrypi-firmware.h b/include/soc/bcm2835/raspberrypi-firmware.h
index ab955591cb72..73cac8d0287e 100644
--- a/include/soc/bcm2835/raspberrypi-firmware.h
+++ b/include/soc/bcm2835/raspberrypi-firmware.h
@@ -170,7 +170,7 @@ struct rpi_firmware_clk_rate_request {
#define RPI_FIRMWARE_CLK_RATE_REQUEST(_id) \
{ \
- .id = _id, \
+ .id = cpu_to_le32(_id), \
}
#if IS_ENABLED(CONFIG_RASPBERRYPI_FIRMWARE)
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 0bce0b4ff2fa..75d7d22c3a27 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -32,6 +32,7 @@ struct prelim_ref;
struct btrfs_space_info;
struct btrfs_raid_bio;
struct raid56_bio_trace_info;
+struct find_free_extent_ctl;
#define show_ref_type(type) \
__print_symbolic(type, \
@@ -98,7 +99,7 @@ struct raid56_bio_trace_info;
EM( FLUSH_DELALLOC_WAIT, "FLUSH_DELALLOC_WAIT") \
EM( FLUSH_DELALLOC_FULL, "FLUSH_DELALLOC_FULL") \
EM( FLUSH_DELAYED_REFS_NR, "FLUSH_DELAYED_REFS_NR") \
- EM( FLUSH_DELAYED_REFS, "FLUSH_ELAYED_REFS") \
+ EM( FLUSH_DELAYED_REFS, "FLUSH_DELAYED_REFS") \
EM( ALLOC_CHUNK, "ALLOC_CHUNK") \
EM( ALLOC_CHUNK_FORCE, "ALLOC_CHUNK_FORCE") \
EM( RUN_DELAYED_IPUTS, "RUN_DELAYED_IPUTS") \
@@ -1241,76 +1242,156 @@ DEFINE_EVENT(btrfs__reserved_extent, btrfs_reserved_extent_free,
TRACE_EVENT(find_free_extent,
- TP_PROTO(const struct btrfs_root *root, u64 num_bytes,
- u64 empty_size, u64 data),
+ TP_PROTO(const struct btrfs_root *root,
+ const struct find_free_extent_ctl *ffe_ctl),
- TP_ARGS(root, num_bytes, empty_size, data),
+ TP_ARGS(root, ffe_ctl),
TP_STRUCT__entry_btrfs(
__field( u64, root_objectid )
__field( u64, num_bytes )
__field( u64, empty_size )
- __field( u64, data )
+ __field( u64, flags )
),
TP_fast_assign_btrfs(root->fs_info,
__entry->root_objectid = root->root_key.objectid;
- __entry->num_bytes = num_bytes;
- __entry->empty_size = empty_size;
- __entry->data = data;
+ __entry->num_bytes = ffe_ctl->num_bytes;
+ __entry->empty_size = ffe_ctl->empty_size;
+ __entry->flags = ffe_ctl->flags;
),
TP_printk_btrfs("root=%llu(%s) len=%llu empty_size=%llu flags=%llu(%s)",
show_root_type(__entry->root_objectid),
- __entry->num_bytes, __entry->empty_size, __entry->data,
- __print_flags((unsigned long)__entry->data, "|",
+ __entry->num_bytes, __entry->empty_size, __entry->flags,
+ __print_flags((unsigned long)__entry->flags, "|",
+ BTRFS_GROUP_FLAGS))
+);
+
+TRACE_EVENT(find_free_extent_search_loop,
+
+ TP_PROTO(const struct btrfs_root *root,
+ const struct find_free_extent_ctl *ffe_ctl),
+
+ TP_ARGS(root, ffe_ctl),
+
+ TP_STRUCT__entry_btrfs(
+ __field( u64, root_objectid )
+ __field( u64, num_bytes )
+ __field( u64, empty_size )
+ __field( u64, flags )
+ __field( u64, loop )
+ ),
+
+ TP_fast_assign_btrfs(root->fs_info,
+ __entry->root_objectid = root->root_key.objectid;
+ __entry->num_bytes = ffe_ctl->num_bytes;
+ __entry->empty_size = ffe_ctl->empty_size;
+ __entry->flags = ffe_ctl->flags;
+ __entry->loop = ffe_ctl->loop;
+ ),
+
+ TP_printk_btrfs("root=%llu(%s) len=%llu empty_size=%llu flags=%llu(%s) loop=%llu",
+ show_root_type(__entry->root_objectid),
+ __entry->num_bytes, __entry->empty_size, __entry->flags,
+ __print_flags((unsigned long)__entry->flags, "|", BTRFS_GROUP_FLAGS),
+ __entry->loop)
+);
+
+TRACE_EVENT(find_free_extent_have_block_group,
+
+ TP_PROTO(const struct btrfs_root *root,
+ const struct find_free_extent_ctl *ffe_ctl,
+ const struct btrfs_block_group *block_group),
+
+ TP_ARGS(root, ffe_ctl, block_group),
+
+ TP_STRUCT__entry_btrfs(
+ __field( u64, root_objectid )
+ __field( u64, num_bytes )
+ __field( u64, empty_size )
+ __field( u64, flags )
+ __field( u64, loop )
+ __field( bool, hinted )
+ __field( u64, bg_start )
+ __field( u64, bg_flags )
+ ),
+
+ TP_fast_assign_btrfs(root->fs_info,
+ __entry->root_objectid = root->root_key.objectid;
+ __entry->num_bytes = ffe_ctl->num_bytes;
+ __entry->empty_size = ffe_ctl->empty_size;
+ __entry->flags = ffe_ctl->flags;
+ __entry->loop = ffe_ctl->loop;
+ __entry->hinted = ffe_ctl->hinted;
+ __entry->bg_start = block_group->start;
+ __entry->bg_flags = block_group->flags;
+ ),
+
+ TP_printk_btrfs(
+"root=%llu(%s) len=%llu empty_size=%llu flags=%llu(%s) loop=%llu hinted=%d block_group=%llu bg_flags=%llu(%s)",
+ show_root_type(__entry->root_objectid),
+ __entry->num_bytes, __entry->empty_size, __entry->flags,
+ __print_flags((unsigned long)__entry->flags, "|", BTRFS_GROUP_FLAGS),
+ __entry->loop, __entry->hinted,
+ __entry->bg_start, __entry->bg_flags,
+ __print_flags((unsigned long)__entry->bg_flags, "|",
BTRFS_GROUP_FLAGS))
);
DECLARE_EVENT_CLASS(btrfs__reserve_extent,
- TP_PROTO(const struct btrfs_block_group *block_group, u64 start,
- u64 len),
+ TP_PROTO(const struct btrfs_block_group *block_group,
+ const struct find_free_extent_ctl *ffe_ctl),
- TP_ARGS(block_group, start, len),
+ TP_ARGS(block_group, ffe_ctl),
TP_STRUCT__entry_btrfs(
__field( u64, bg_objectid )
__field( u64, flags )
+ __field( int, bg_size_class )
__field( u64, start )
__field( u64, len )
+ __field( u64, loop )
+ __field( bool, hinted )
+ __field( int, size_class )
),
TP_fast_assign_btrfs(block_group->fs_info,
__entry->bg_objectid = block_group->start;
__entry->flags = block_group->flags;
- __entry->start = start;
- __entry->len = len;
+ __entry->bg_size_class = block_group->size_class;
+ __entry->start = ffe_ctl->search_start;
+ __entry->len = ffe_ctl->num_bytes;
+ __entry->loop = ffe_ctl->loop;
+ __entry->hinted = ffe_ctl->hinted;
+ __entry->size_class = ffe_ctl->size_class;
),
- TP_printk_btrfs("root=%llu(%s) block_group=%llu flags=%llu(%s) "
- "start=%llu len=%llu",
+ TP_printk_btrfs(
+"root=%llu(%s) block_group=%llu flags=%llu(%s) bg_size_class=%d start=%llu len=%llu loop=%llu hinted=%d size_class=%d",
show_root_type(BTRFS_EXTENT_TREE_OBJECTID),
__entry->bg_objectid,
__entry->flags, __print_flags((unsigned long)__entry->flags,
"|", BTRFS_GROUP_FLAGS),
- __entry->start, __entry->len)
+ __entry->bg_size_class, __entry->start, __entry->len,
+ __entry->loop, __entry->hinted, __entry->size_class)
);
DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent,
- TP_PROTO(const struct btrfs_block_group *block_group, u64 start,
- u64 len),
+ TP_PROTO(const struct btrfs_block_group *block_group,
+ const struct find_free_extent_ctl *ffe_ctl),
- TP_ARGS(block_group, start, len)
+ TP_ARGS(block_group, ffe_ctl)
);
DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent_cluster,
- TP_PROTO(const struct btrfs_block_group *block_group, u64 start,
- u64 len),
+ TP_PROTO(const struct btrfs_block_group *block_group,
+ const struct find_free_extent_ctl *ffe_ctl),
- TP_ARGS(block_group, start, len)
+ TP_ARGS(block_group, ffe_ctl)
);
TRACE_EVENT(btrfs_find_cluster,
diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h
index 4f4c44ea3a65..cf4a0d28b178 100644
--- a/include/trace/events/erofs.h
+++ b/include/trace/events/erofs.h
@@ -19,12 +19,17 @@ struct erofs_map_blocks;
{ 1, "DIR" })
#define show_map_flags(flags) __print_flags(flags, "|", \
- { EROFS_GET_BLOCKS_RAW, "RAW" })
+ { EROFS_GET_BLOCKS_FIEMAP, "FIEMAP" }, \
+ { EROFS_GET_BLOCKS_READMORE, "READMORE" }, \
+ { EROFS_GET_BLOCKS_FINDTAIL, "FINDTAIL" })
#define show_mflags(flags) __print_flags(flags, "", \
- { EROFS_MAP_MAPPED, "M" }, \
- { EROFS_MAP_META, "I" }, \
- { EROFS_MAP_ENCODED, "E" })
+ { EROFS_MAP_MAPPED, "M" }, \
+ { EROFS_MAP_META, "I" }, \
+ { EROFS_MAP_ENCODED, "E" }, \
+ { EROFS_MAP_FULL_MAPPED, "F" }, \
+ { EROFS_MAP_FRAGMENT, "R" }, \
+ { EROFS_MAP_PARTIAL_REF, "P" })
TRACE_EVENT(erofs_lookup,
@@ -66,8 +71,8 @@ TRACE_EVENT(erofs_fill_inode,
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->nid = EROFS_I(inode)->nid;
- __entry->blkaddr = erofs_blknr(iloc(EROFS_I_SB(inode), __entry->nid));
- __entry->ofs = erofs_blkoff(iloc(EROFS_I_SB(inode), __entry->nid));
+ __entry->blkaddr = erofs_blknr(erofs_iloc(inode));
+ __entry->ofs = erofs_blkoff(erofs_iloc(inode));
),
TP_printk("dev = (%d,%d), nid = %llu, blkaddr %u ofs %u",
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index c6cfed00d0c6..283db0ea3db4 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -16,7 +16,107 @@
/*
* Declare tracing information enums and their string mappings for display.
*/
+#define rxrpc_abort_reasons \
+ /* AFS errors */ \
+ EM(afs_abort_general_error, "afs-error") \
+ EM(afs_abort_interrupted, "afs-intr") \
+ EM(afs_abort_oom, "afs-oom") \
+ EM(afs_abort_op_not_supported, "afs-op-notsupp") \
+ EM(afs_abort_probeuuid_negative, "afs-probeuuid-neg") \
+ EM(afs_abort_send_data_error, "afs-send-data") \
+ EM(afs_abort_unmarshal_error, "afs-unmarshal") \
+ /* rxperf errors */ \
+ EM(rxperf_abort_general_error, "rxperf-error") \
+ EM(rxperf_abort_oom, "rxperf-oom") \
+ EM(rxperf_abort_op_not_supported, "rxperf-op-notsupp") \
+ EM(rxperf_abort_unmarshal_error, "rxperf-unmarshal") \
+ /* RxKAD security errors */ \
+ EM(rxkad_abort_1_short_check, "rxkad1-short-check") \
+ EM(rxkad_abort_1_short_data, "rxkad1-short-data") \
+ EM(rxkad_abort_1_short_encdata, "rxkad1-short-encdata") \
+ EM(rxkad_abort_1_short_header, "rxkad1-short-hdr") \
+ EM(rxkad_abort_2_short_check, "rxkad2-short-check") \
+ EM(rxkad_abort_2_short_data, "rxkad2-short-data") \
+ EM(rxkad_abort_2_short_header, "rxkad2-short-hdr") \
+ EM(rxkad_abort_2_short_len, "rxkad2-short-len") \
+ EM(rxkad_abort_bad_checksum, "rxkad2-bad-cksum") \
+ EM(rxkad_abort_chall_key_expired, "rxkad-chall-key-exp") \
+ EM(rxkad_abort_chall_level, "rxkad-chall-level") \
+ EM(rxkad_abort_chall_no_key, "rxkad-chall-nokey") \
+ EM(rxkad_abort_chall_short, "rxkad-chall-short") \
+ EM(rxkad_abort_chall_version, "rxkad-chall-version") \
+ EM(rxkad_abort_resp_bad_callid, "rxkad-resp-bad-callid") \
+ EM(rxkad_abort_resp_bad_checksum, "rxkad-resp-bad-cksum") \
+ EM(rxkad_abort_resp_bad_param, "rxkad-resp-bad-param") \
+ EM(rxkad_abort_resp_call_ctr, "rxkad-resp-call-ctr") \
+ EM(rxkad_abort_resp_call_state, "rxkad-resp-call-state") \
+ EM(rxkad_abort_resp_key_expired, "rxkad-resp-key-exp") \
+ EM(rxkad_abort_resp_key_rejected, "rxkad-resp-key-rej") \
+ EM(rxkad_abort_resp_level, "rxkad-resp-level") \
+ EM(rxkad_abort_resp_nokey, "rxkad-resp-nokey") \
+ EM(rxkad_abort_resp_ooseq, "rxkad-resp-ooseq") \
+ EM(rxkad_abort_resp_short, "rxkad-resp-short") \
+ EM(rxkad_abort_resp_short_tkt, "rxkad-resp-short-tkt") \
+ EM(rxkad_abort_resp_tkt_aname, "rxkad-resp-tk-aname") \
+ EM(rxkad_abort_resp_tkt_expired, "rxkad-resp-tk-exp") \
+ EM(rxkad_abort_resp_tkt_future, "rxkad-resp-tk-future") \
+ EM(rxkad_abort_resp_tkt_inst, "rxkad-resp-tk-inst") \
+ EM(rxkad_abort_resp_tkt_len, "rxkad-resp-tk-len") \
+ EM(rxkad_abort_resp_tkt_realm, "rxkad-resp-tk-realm") \
+ EM(rxkad_abort_resp_tkt_short, "rxkad-resp-tk-short") \
+ EM(rxkad_abort_resp_tkt_sinst, "rxkad-resp-tk-sinst") \
+ EM(rxkad_abort_resp_tkt_sname, "rxkad-resp-tk-sname") \
+ EM(rxkad_abort_resp_unknown_tkt, "rxkad-resp-unknown-tkt") \
+ EM(rxkad_abort_resp_version, "rxkad-resp-version") \
+ /* rxrpc errors */ \
+ EM(rxrpc_abort_call_improper_term, "call-improper-term") \
+ EM(rxrpc_abort_call_reset, "call-reset") \
+ EM(rxrpc_abort_call_sendmsg, "call-sendmsg") \
+ EM(rxrpc_abort_call_sock_release, "call-sock-rel") \
+ EM(rxrpc_abort_call_sock_release_tba, "call-sock-rel-tba") \
+ EM(rxrpc_abort_call_timeout, "call-timeout") \
+ EM(rxrpc_abort_no_service_key, "no-serv-key") \
+ EM(rxrpc_abort_nomem, "nomem") \
+ EM(rxrpc_abort_service_not_offered, "serv-not-offered") \
+ EM(rxrpc_abort_shut_down, "shut-down") \
+ EM(rxrpc_abort_unsupported_security, "unsup-sec") \
+ EM(rxrpc_badmsg_bad_abort, "bad-abort") \
+ EM(rxrpc_badmsg_bad_jumbo, "bad-jumbo") \
+ EM(rxrpc_badmsg_short_ack, "short-ack") \
+ EM(rxrpc_badmsg_short_ack_info, "short-ack-info") \
+ EM(rxrpc_badmsg_short_hdr, "short-hdr") \
+ EM(rxrpc_badmsg_unsupported_packet, "unsup-pkt") \
+ EM(rxrpc_badmsg_zero_call, "zero-call") \
+ EM(rxrpc_badmsg_zero_seq, "zero-seq") \
+ EM(rxrpc_badmsg_zero_service, "zero-service") \
+ EM(rxrpc_eproto_ackr_outside_window, "ackr-out-win") \
+ EM(rxrpc_eproto_ackr_sack_overflow, "ackr-sack-over") \
+ EM(rxrpc_eproto_ackr_short_sack, "ackr-short-sack") \
+ EM(rxrpc_eproto_ackr_zero, "ackr-zero") \
+ EM(rxrpc_eproto_bad_upgrade, "bad-upgrade") \
+ EM(rxrpc_eproto_data_after_last, "data-after-last") \
+ EM(rxrpc_eproto_different_last, "diff-last") \
+ EM(rxrpc_eproto_early_reply, "early-reply") \
+ EM(rxrpc_eproto_improper_term, "improper-term") \
+ EM(rxrpc_eproto_no_client_call, "no-cl-call") \
+ EM(rxrpc_eproto_no_client_conn, "no-cl-conn") \
+ EM(rxrpc_eproto_no_service_call, "no-sv-call") \
+ EM(rxrpc_eproto_reupgrade, "re-upgrade") \
+ EM(rxrpc_eproto_rxnull_challenge, "rxnull-chall") \
+ EM(rxrpc_eproto_rxnull_response, "rxnull-resp") \
+ EM(rxrpc_eproto_tx_rot_last, "tx-rot-last") \
+ EM(rxrpc_eproto_unexpected_ack, "unex-ack") \
+ EM(rxrpc_eproto_unexpected_ackall, "unex-ackall") \
+ EM(rxrpc_eproto_unexpected_implicit_end, "unex-impl-end") \
+ EM(rxrpc_eproto_unexpected_reply, "unex-reply") \
+ EM(rxrpc_eproto_wrong_security, "wrong-sec") \
+ EM(rxrpc_recvmsg_excess_data, "recvmsg-excess") \
+ EM(rxrpc_recvmsg_short_data, "recvmsg-short") \
+ E_(rxrpc_sendmsg_late_send, "sendmsg-late")
+
#define rxrpc_call_poke_traces \
+ EM(rxrpc_call_poke_abort, "Abort") \
+ EM(rxrpc_call_poke_complete, "Compl") \
EM(rxrpc_call_poke_error, "Error") \
EM(rxrpc_call_poke_idle, "Idle") \
EM(rxrpc_call_poke_start, "Start") \
@@ -26,6 +126,7 @@
#define rxrpc_skb_traces \
EM(rxrpc_skb_eaten_by_unshare, "ETN unshare ") \
EM(rxrpc_skb_eaten_by_unshare_nomem, "ETN unshar-nm") \
+ EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \
EM(rxrpc_skb_get_conn_work, "GET conn-work") \
EM(rxrpc_skb_get_local_work, "GET locl-work") \
EM(rxrpc_skb_get_reject_work, "GET rej-work ") \
@@ -35,6 +136,7 @@
EM(rxrpc_skb_new_error_report, "NEW error-rpt") \
EM(rxrpc_skb_new_jumbo_subpacket, "NEW jumbo-sub") \
EM(rxrpc_skb_new_unshared, "NEW unshared ") \
+ EM(rxrpc_skb_put_conn_secured, "PUT conn-secd") \
EM(rxrpc_skb_put_conn_work, "PUT conn-work") \
EM(rxrpc_skb_put_error_report, "PUT error-rep") \
EM(rxrpc_skb_put_input, "PUT input ") \
@@ -76,7 +178,6 @@
#define rxrpc_peer_traces \
EM(rxrpc_peer_free, "FREE ") \
EM(rxrpc_peer_get_accept, "GET accept ") \
- EM(rxrpc_peer_get_activate_call, "GET act-call") \
EM(rxrpc_peer_get_bundle, "GET bundle ") \
EM(rxrpc_peer_get_client_conn, "GET cln-conn") \
EM(rxrpc_peer_get_input, "GET input ") \
@@ -89,7 +190,6 @@
EM(rxrpc_peer_put_bundle, "PUT bundle ") \
EM(rxrpc_peer_put_call, "PUT call ") \
EM(rxrpc_peer_put_conn, "PUT conn ") \
- EM(rxrpc_peer_put_discard_tmp, "PUT disc-tmp") \
EM(rxrpc_peer_put_input, "PUT input ") \
EM(rxrpc_peer_put_input_error, "PUT inpt-err") \
E_(rxrpc_peer_put_keepalive, "PUT keepaliv")
@@ -99,6 +199,7 @@
EM(rxrpc_bundle_get_client_call, "GET clt-call") \
EM(rxrpc_bundle_get_client_conn, "GET clt-conn") \
EM(rxrpc_bundle_get_service_conn, "GET svc-conn") \
+ EM(rxrpc_bundle_put_call, "PUT call ") \
EM(rxrpc_bundle_put_conn, "PUT conn ") \
EM(rxrpc_bundle_put_discard, "PUT discard ") \
E_(rxrpc_bundle_new, "NEW ")
@@ -109,14 +210,14 @@
EM(rxrpc_conn_get_call_input, "GET inp-call") \
EM(rxrpc_conn_get_conn_input, "GET inp-conn") \
EM(rxrpc_conn_get_idle, "GET idle ") \
- EM(rxrpc_conn_get_poke, "GET poke ") \
+ EM(rxrpc_conn_get_poke_abort, "GET pk-abort") \
+ EM(rxrpc_conn_get_poke_timer, "GET poke ") \
EM(rxrpc_conn_get_service_conn, "GET svc-conn") \
EM(rxrpc_conn_new_client, "NEW client ") \
EM(rxrpc_conn_new_service, "NEW service ") \
EM(rxrpc_conn_put_call, "PUT call ") \
EM(rxrpc_conn_put_call_input, "PUT inp-call") \
EM(rxrpc_conn_put_conn_input, "PUT inp-conn") \
- EM(rxrpc_conn_put_discard, "PUT discard ") \
EM(rxrpc_conn_put_discard_idle, "PUT disc-idl") \
EM(rxrpc_conn_put_local_dead, "PUT loc-dead") \
EM(rxrpc_conn_put_noreuse, "PUT noreuse ") \
@@ -124,10 +225,10 @@
EM(rxrpc_conn_put_service_reaped, "PUT svc-reap") \
EM(rxrpc_conn_put_unbundle, "PUT unbundle") \
EM(rxrpc_conn_put_unidle, "PUT unidle ") \
+ EM(rxrpc_conn_put_work, "PUT work ") \
EM(rxrpc_conn_queue_challenge, "QUE chall ") \
EM(rxrpc_conn_queue_retry_work, "QUE retry-wk") \
EM(rxrpc_conn_queue_rx_work, "QUE rx-work ") \
- EM(rxrpc_conn_queue_timer, "QUE timer ") \
EM(rxrpc_conn_see_new_service_conn, "SEE new-svc ") \
EM(rxrpc_conn_see_reap_service, "SEE reap-svc") \
E_(rxrpc_conn_see_work, "SEE work ")
@@ -138,16 +239,16 @@
EM(rxrpc_client_chan_activate, "ChActv") \
EM(rxrpc_client_chan_disconnect, "ChDisc") \
EM(rxrpc_client_chan_pass, "ChPass") \
- EM(rxrpc_client_chan_wait_failed, "ChWtFl") \
EM(rxrpc_client_cleanup, "Clean ") \
EM(rxrpc_client_discard, "Discar") \
- EM(rxrpc_client_duplicate, "Duplic") \
EM(rxrpc_client_exposed, "Expose") \
EM(rxrpc_client_replace, "Replac") \
+ EM(rxrpc_client_queue_new_call, "Q-Call") \
EM(rxrpc_client_to_active, "->Actv") \
E_(rxrpc_client_to_idle, "->Idle")
#define rxrpc_call_traces \
+ EM(rxrpc_call_get_io_thread, "GET iothread") \
EM(rxrpc_call_get_input, "GET input ") \
EM(rxrpc_call_get_kernel_service, "GET krnl-srv") \
EM(rxrpc_call_get_notify_socket, "GET notify ") \
@@ -160,6 +261,7 @@
EM(rxrpc_call_new_prealloc_service, "NEW prealloc") \
EM(rxrpc_call_put_discard_prealloc, "PUT disc-pre") \
EM(rxrpc_call_put_discard_error, "PUT disc-err") \
+ EM(rxrpc_call_put_io_thread, "PUT iothread") \
EM(rxrpc_call_put_input, "PUT input ") \
EM(rxrpc_call_put_kernel, "PUT kernel ") \
EM(rxrpc_call_put_poke, "PUT poke ") \
@@ -169,10 +271,12 @@
EM(rxrpc_call_put_sendmsg, "PUT sendmsg ") \
EM(rxrpc_call_put_unnotify, "PUT unnotify") \
EM(rxrpc_call_put_userid_exists, "PUT u-exists") \
+ EM(rxrpc_call_put_userid, "PUT user-id ") \
EM(rxrpc_call_see_accept, "SEE accept ") \
EM(rxrpc_call_see_activate_client, "SEE act-clnt") \
EM(rxrpc_call_see_connect_failed, "SEE con-fail") \
EM(rxrpc_call_see_connected, "SEE connect ") \
+ EM(rxrpc_call_see_disconnected, "SEE disconn ") \
EM(rxrpc_call_see_distribute_error, "SEE dist-err") \
EM(rxrpc_call_see_input, "SEE input ") \
EM(rxrpc_call_see_release, "SEE release ") \
@@ -376,6 +480,7 @@
#define EM(a, b) a,
#define E_(a, b) a
+enum rxrpc_abort_reason { rxrpc_abort_reasons } __mode(byte);
enum rxrpc_bundle_trace { rxrpc_bundle_traces } __mode(byte);
enum rxrpc_call_poke_trace { rxrpc_call_poke_traces } __mode(byte);
enum rxrpc_call_trace { rxrpc_call_traces } __mode(byte);
@@ -404,9 +509,13 @@ enum rxrpc_txqueue_trace { rxrpc_txqueue_traces } __mode(byte);
*/
#undef EM
#undef E_
+
+#ifndef RXRPC_TRACE_ONLY_DEFINE_ENUMS
+
#define EM(a, b) TRACE_DEFINE_ENUM(a);
#define E_(a, b) TRACE_DEFINE_ENUM(a);
+rxrpc_abort_reasons;
rxrpc_bundle_traces;
rxrpc_call_poke_traces;
rxrpc_call_traces;
@@ -657,14 +766,14 @@ TRACE_EVENT(rxrpc_rx_done,
);
TRACE_EVENT(rxrpc_abort,
- TP_PROTO(unsigned int call_nr, const char *why, u32 cid, u32 call_id,
- rxrpc_seq_t seq, int abort_code, int error),
+ TP_PROTO(unsigned int call_nr, enum rxrpc_abort_reason why,
+ u32 cid, u32 call_id, rxrpc_seq_t seq, int abort_code, int error),
TP_ARGS(call_nr, why, cid, call_id, seq, abort_code, error),
TP_STRUCT__entry(
__field(unsigned int, call_nr )
- __array(char, why, 4 )
+ __field(enum rxrpc_abort_reason, why )
__field(u32, cid )
__field(u32, call_id )
__field(rxrpc_seq_t, seq )
@@ -673,8 +782,8 @@ TRACE_EVENT(rxrpc_abort,
),
TP_fast_assign(
- memcpy(__entry->why, why, 4);
__entry->call_nr = call_nr;
+ __entry->why = why;
__entry->cid = cid;
__entry->call_id = call_id;
__entry->abort_code = abort_code;
@@ -685,7 +794,8 @@ TRACE_EVENT(rxrpc_abort,
TP_printk("c=%08x %08x:%08x s=%u a=%d e=%d %s",
__entry->call_nr,
__entry->cid, __entry->call_id, __entry->seq,
- __entry->abort_code, __entry->error, __entry->why)
+ __entry->abort_code, __entry->error,
+ __print_symbolic(__entry->why, rxrpc_abort_reasons))
);
TRACE_EVENT(rxrpc_call_complete,
@@ -1062,10 +1172,10 @@ TRACE_EVENT(rxrpc_receive,
);
TRACE_EVENT(rxrpc_recvmsg,
- TP_PROTO(struct rxrpc_call *call, enum rxrpc_recvmsg_trace why,
+ TP_PROTO(unsigned int call_debug_id, enum rxrpc_recvmsg_trace why,
int ret),
- TP_ARGS(call, why, ret),
+ TP_ARGS(call_debug_id, why, ret),
TP_STRUCT__entry(
__field(unsigned int, call )
@@ -1074,7 +1184,7 @@ TRACE_EVENT(rxrpc_recvmsg,
),
TP_fast_assign(
- __entry->call = call ? call->debug_id : 0;
+ __entry->call = call_debug_id;
__entry->why = why;
__entry->ret = ret;
),
@@ -1521,30 +1631,6 @@ TRACE_EVENT(rxrpc_improper_term,
__entry->abort_code)
);
-TRACE_EVENT(rxrpc_rx_eproto,
- TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t serial,
- const char *why),
-
- TP_ARGS(call, serial, why),
-
- TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(rxrpc_serial_t, serial )
- __field(const char *, why )
- ),
-
- TP_fast_assign(
- __entry->call = call ? call->debug_id : 0;
- __entry->serial = serial;
- __entry->why = why;
- ),
-
- TP_printk("c=%08x EPROTO %08x %s",
- __entry->call,
- __entry->serial,
- __entry->why)
- );
-
TRACE_EVENT(rxrpc_connect_call,
TP_PROTO(struct rxrpc_call *call),
@@ -1842,6 +1928,8 @@ TRACE_EVENT(rxrpc_call_poked,
#undef EM
#undef E_
+
+#endif /* RXRPC_TRACE_ONLY_DEFINE_ENUMS */
#endif /* _TRACE_RXRPC_H */
/* This part must be outside protection */
diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h
index affd541fd25e..b6f679ae21aa 100644
--- a/include/trace/stages/stage4_event_fields.h
+++ b/include/trace/stages/stage4_event_fields.h
@@ -26,7 +26,8 @@
#define __array(_type, _item, _len) { \
.type = #_type"["__stringify(_len)"]", .name = #_item, \
.size = sizeof(_type[_len]), .align = ALIGN_STRUCTFIELD(_type), \
- .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER },
+ .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER,\
+ .len = _len },
#undef __dynamic_array
#define __dynamic_array(_type, _item, _len) { \
diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h
index 0512fde5e697..7b158fcb02b4 100644
--- a/include/uapi/drm/virtgpu_drm.h
+++ b/include/uapi/drm/virtgpu_drm.h
@@ -64,6 +64,7 @@ struct drm_virtgpu_map {
__u32 pad;
};
+/* fence_fd is modified on success if VIRTGPU_EXECBUF_FENCE_FD_OUT flag is set. */
struct drm_virtgpu_execbuffer {
__u32 flags;
__u32 size;
diff --git a/include/uapi/linux/atmbr2684.h b/include/uapi/linux/atmbr2684.h
index a9e2250cd720..d47c47d06f11 100644
--- a/include/uapi/linux/atmbr2684.h
+++ b/include/uapi/linux/atmbr2684.h
@@ -38,7 +38,7 @@
*/
#define BR2684_ENCAPS_VC (0) /* VC-mux */
#define BR2684_ENCAPS_LLC (1)
-#define BR2684_ENCAPS_AUTODETECT (2) /* Unsuported */
+#define BR2684_ENCAPS_AUTODETECT (2) /* Unsupported */
/*
* Is this VC bridged or routed?
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index 436258214bb0..cd14c94e9a1e 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -188,15 +188,43 @@ struct fanotify_event_info_error {
__u32 error_count;
};
+/*
+ * User space may need to record additional information about its decision.
+ * The extra information type records what kind of information is included.
+ * The default is none. We also define an extra information buffer whose
+ * size is determined by the extra information type.
+ *
+ * If the information type is Audit Rule, then the information following
+ * is the rule number that triggered the user space decision that
+ * requires auditing.
+ */
+
+#define FAN_RESPONSE_INFO_NONE 0
+#define FAN_RESPONSE_INFO_AUDIT_RULE 1
+
struct fanotify_response {
__s32 fd;
__u32 response;
};
+struct fanotify_response_info_header {
+ __u8 type;
+ __u8 pad;
+ __u16 len;
+};
+
+struct fanotify_response_info_audit_rule {
+ struct fanotify_response_info_header hdr;
+ __u32 rule_number;
+ __u32 subj_trust;
+ __u32 obj_trust;
+};
+
/* Legit userspace responses to a _PERM event */
#define FAN_ALLOW 0x01
#define FAN_DENY 0x02
-#define FAN_AUDIT 0x10 /* Bit mask to create audit record for result */
+#define FAN_AUDIT 0x10 /* Bitmask to create audit record for result */
+#define FAN_INFO 0x20 /* Bitmask to indicate additional information */
/* No fd set in event */
#define FAN_NOFD -1
diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h
index 874a92349bf5..283dec7e3645 100644
--- a/include/uapi/linux/ip.h
+++ b/include/uapi/linux/ip.h
@@ -18,6 +18,7 @@
#ifndef _UAPI_LINUX_IP_H
#define _UAPI_LINUX_IP_H
#include <linux/types.h>
+#include <linux/stddef.h>
#include <asm/byteorder.h>
#define IPTOS_TOS_MASK 0x1E
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 81f4243bebb1..53326dfc59ec 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -4,6 +4,7 @@
#include <linux/libc-compat.h>
#include <linux/types.h>
+#include <linux/stddef.h>
#include <linux/in6.h>
#include <asm/byteorder.h>
diff --git a/include/uapi/linux/netfilter/nf_conntrack_sctp.h b/include/uapi/linux/netfilter/nf_conntrack_sctp.h
index c742469afe21..2d6f80d75ae7 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_sctp.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_sctp.h
@@ -15,8 +15,7 @@ enum sctp_conntrack {
SCTP_CONNTRACK_SHUTDOWN_RECD,
SCTP_CONNTRACK_SHUTDOWN_ACK_SENT,
SCTP_CONNTRACK_HEARTBEAT_SENT,
- SCTP_CONNTRACK_HEARTBEAT_ACKED,
- SCTP_CONNTRACK_DATA_SENT,
+ SCTP_CONNTRACK_HEARTBEAT_ACKED, /* no longer used */
SCTP_CONNTRACK_MAX
};
diff --git a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h
index 94e74034706d..aa805e6d4e28 100644
--- a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h
+++ b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h
@@ -94,8 +94,7 @@ enum ctattr_timeout_sctp {
CTA_TIMEOUT_SCTP_SHUTDOWN_RECD,
CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT,
CTA_TIMEOUT_SCTP_HEARTBEAT_SENT,
- CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED,
- CTA_TIMEOUT_SCTP_DATA_SENT,
+ CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED, /* no longer used */
__CTA_TIMEOUT_SCTP_MAX
};
#define CTA_TIMEOUT_SCTP_MAX (__CTA_TIMEOUT_SCTP_MAX - 1)
diff --git a/include/uapi/linux/pktcdvd.h b/include/uapi/linux/pktcdvd.h
new file mode 100644
index 000000000000..9cbb55d21c94
--- /dev/null
+++ b/include/uapi/linux/pktcdvd.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2000 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2001-2004 Peter Osterlund <petero2@telia.com>
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License. See linux/COPYING for more information.
+ *
+ * Packet writing layer for ATAPI and SCSI CD-R, CD-RW, DVD-R, and
+ * DVD-RW devices.
+ *
+ */
+#ifndef _UAPI__PKTCDVD_H
+#define _UAPI__PKTCDVD_H
+
+#include <linux/types.h>
+
+/*
+ * 1 for normal debug messages, 2 is very verbose. 0 to turn it off.
+ */
+#define PACKET_DEBUG 1
+
+#define MAX_WRITERS 8
+
+#define PKT_RB_POOL_SIZE 512
+
+/*
+ * How long we should hold a non-full packet before starting data gathering.
+ */
+#define PACKET_WAIT_TIME (HZ * 5 / 1000)
+
+/*
+ * use drive write caching -- we need deferred error handling to be
+ * able to successfully recover with this option (drive will return good
+ * status as soon as the cdb is validated).
+ */
+#if defined(CONFIG_CDROM_PKTCDVD_WCACHE)
+#define USE_WCACHING 1
+#else
+#define USE_WCACHING 0
+#endif
+
+/*
+ * No user-servicable parts beyond this point ->
+ */
+
+/*
+ * device types
+ */
+#define PACKET_CDR 1
+#define PACKET_CDRW 2
+#define PACKET_DVDR 3
+#define PACKET_DVDRW 4
+
+/*
+ * flags
+ */
+#define PACKET_WRITABLE 1 /* pd is writable */
+#define PACKET_NWA_VALID 2 /* next writable address valid */
+#define PACKET_LRA_VALID 3 /* last recorded address valid */
+#define PACKET_MERGE_SEGS 4 /* perform segment merging to keep */
+ /* underlying cdrom device happy */
+
+/*
+ * Disc status -- from READ_DISC_INFO
+ */
+#define PACKET_DISC_EMPTY 0
+#define PACKET_DISC_INCOMPLETE 1
+#define PACKET_DISC_COMPLETE 2
+#define PACKET_DISC_OTHER 3
+
+/*
+ * write type, and corresponding data block type
+ */
+#define PACKET_MODE1 1
+#define PACKET_MODE2 2
+#define PACKET_BLOCK_MODE1 8
+#define PACKET_BLOCK_MODE2 10
+
+/*
+ * Last session/border status
+ */
+#define PACKET_SESSION_EMPTY 0
+#define PACKET_SESSION_INCOMPLETE 1
+#define PACKET_SESSION_RESERVED 2
+#define PACKET_SESSION_COMPLETE 3
+
+#define PACKET_MCN "4a656e734178626f65323030300000"
+
+#undef PACKET_USE_LS
+
+#define PKT_CTRL_CMD_SETUP 0
+#define PKT_CTRL_CMD_TEARDOWN 1
+#define PKT_CTRL_CMD_STATUS 2
+
+struct pkt_ctrl_command {
+ __u32 command; /* in: Setup, teardown, status */
+ __u32 dev_index; /* in/out: Device index */
+ __u32 dev; /* in/out: Device nr for cdrw device */
+ __u32 pkt_dev; /* in/out: Device nr for packet device */
+ __u32 num_devices; /* out: Largest device index + 1 */
+ __u32 padding; /* Not used */
+};
+
+/*
+ * packet ioctls
+ */
+#define PACKET_IOCTL_MAGIC ('X')
+#define PACKET_CTRL_CMD _IOWR(PACKET_IOCTL_MAGIC, 1, struct pkt_ctrl_command)
+
+
+#endif /* _UAPI__PKTCDVD_H */
diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h
index 3511095c2702..42a40ad3fb62 100644
--- a/include/uapi/linux/psci.h
+++ b/include/uapi/linux/psci.h
@@ -58,7 +58,7 @@
#define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18)
#define PSCI_1_1_FN_MEM_PROTECT PSCI_0_2_FN(19)
-#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(19)
+#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(20)
#define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND PSCI_0_2_FN64(12)
#define PSCI_1_0_FN64_NODE_HW_STATE PSCI_0_2_FN64(13)
@@ -67,7 +67,7 @@
#define PSCI_1_0_FN64_STAT_COUNT PSCI_0_2_FN64(17)
#define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18)
-#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(19)
+#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(20)
/* PSCI v0.2 power state encoding for CPU_SUSPEND function */
#define PSCI_0_2_POWER_STATE_ID_MASK 0xffff
diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h
index 9bd79235c875..54b649ab0f22 100644
--- a/include/uapi/linux/vdpa.h
+++ b/include/uapi/linux/vdpa.h
@@ -53,11 +53,9 @@ enum vdpa_attr {
VDPA_ATTR_DEV_VENDOR_ATTR_NAME, /* string */
VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, /* u64 */
+ /* virtio features that are provisioned to the vDPA device */
VDPA_ATTR_DEV_FEATURES, /* u64 */
- /* virtio features that are supported by the vDPA device */
- VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES, /* u64 */
-
/* new attributes must be added above here */
VDPA_ATTR_MAX,
};
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 5cf81dff60aa..727084cd79be 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -808,6 +808,7 @@ struct ufs_hba_monitor {
* @urgent_bkops_lvl: keeps track of urgent bkops level for device
* @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
* device is known or not.
+ * @wb_mutex: used to serialize devfreq and sysfs write booster toggling
* @clk_scaling_lock: used to serialize device commands and clock scaling
* @desc_size: descriptor sizes reported by device
* @scsi_block_reqs_cnt: reference counting for scsi block requests
@@ -951,6 +952,7 @@ struct ufs_hba {
enum bkops_status urgent_bkops_lvl;
bool is_urgent_bkops_lvl_checked;
+ struct mutex wb_mutex;
struct rw_semaphore clk_scaling_lock;
unsigned char desc_size[QUERY_DESC_IDN_MAX];
atomic_t scsi_block_reqs_cnt;
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index eaa932b99d8a..ad4fb4eab753 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -117,7 +117,7 @@ struct xenbus_driver {
const struct xenbus_device_id *id);
void (*otherend_changed)(struct xenbus_device *dev,
enum xenbus_state backend_state);
- int (*remove)(struct xenbus_device *dev);
+ void (*remove)(struct xenbus_device *dev);
int (*suspend)(struct xenbus_device *dev);
int (*resume)(struct xenbus_device *dev);
int (*uevent)(struct xenbus_device *, struct kobj_uevent_env *);
diff --git a/init/Kconfig b/init/Kconfig
index 7e5c3ddc341d..44e90b28a30f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -204,7 +204,7 @@ config LOCALVERSION_AUTO
appended after any matching localversion* files, and after the value
set in CONFIG_LOCALVERSION.
- (The actual string used here is the first eight characters produced
+ (The actual string used here is the first 12 characters produced
by running the command:
$ git rev-parse --verify HEAD
@@ -776,7 +776,7 @@ config PRINTK_SAFE_LOG_BUF_SHIFT
depends on PRINTK
help
Select the size of an alternate printk per-CPU buffer where messages
- printed from usafe contexts are temporary stored. One example would
+ printed from unsafe contexts are temporary stored. One example would
be NMI messages, another one - printk recursion. The messages are
copied to the main log buffer in a safe context to avoid a deadlock.
The value defines the size as a power of 2.
@@ -894,13 +894,17 @@ config CC_IMPLICIT_FALLTHROUGH
default "-Wimplicit-fallthrough=5" if CC_IS_GCC && $(cc-option,-Wimplicit-fallthrough=5)
default "-Wimplicit-fallthrough" if CC_IS_CLANG && $(cc-option,-Wunreachable-code-fallthrough)
-# Currently, disable gcc-12 array-bounds globally.
+# Currently, disable gcc-11,12 array-bounds globally.
# We may want to target only particular configurations some day.
+config GCC11_NO_ARRAY_BOUNDS
+ def_bool y
+
config GCC12_NO_ARRAY_BOUNDS
def_bool y
config CC_NO_ARRAY_BOUNDS
bool
+ default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC_VERSION < 120000 && GCC11_NO_ARRAY_BOUNDS
default y if CC_IS_GCC && GCC_VERSION >= 120000 && GCC_VERSION < 130000 && GCC12_NO_ARRAY_BOUNDS
#
diff --git a/init/Makefile b/init/Makefile
index 8316c23bead2..26de459006c4 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -59,3 +59,4 @@ include/generated/utsversion.h: FORCE
$(obj)/version-timestamp.o: include/generated/utsversion.h
CFLAGS_version-timestamp.o := -include include/generated/utsversion.h
+KASAN_SANITIZE_version-timestamp.o := n
diff --git a/init/version-timestamp.c b/init/version-timestamp.c
index 179e93bae539..043cbf80a766 100644
--- a/init/version-timestamp.c
+++ b/init/version-timestamp.c
@@ -2,7 +2,6 @@
#include <generated/compile.h>
#include <generated/utsrelease.h>
-#include <linux/version.h>
#include <linux/proc_ns.h>
#include <linux/refcount.h>
#include <linux/uts.h>
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index 2e04850a657b..882bd56b01ed 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -170,12 +170,11 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
xa_for_each(&ctx->personalities, index, cred)
io_uring_show_cred(m, index, cred);
}
- if (has_lock)
- mutex_unlock(&ctx->uring_lock);
seq_puts(m, "PollList:\n");
for (i = 0; i < (1U << ctx->cancel_table.hash_bits); i++) {
struct io_hash_bucket *hb = &ctx->cancel_table.hbs[i];
+ struct io_hash_bucket *hbl = &ctx->cancel_table_locked.hbs[i];
struct io_kiocb *req;
spin_lock(&hb->lock);
@@ -183,8 +182,17 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
seq_printf(m, " op=%d, task_works=%d\n", req->opcode,
task_work_pending(req->task));
spin_unlock(&hb->lock);
+
+ if (!has_lock)
+ continue;
+ hlist_for_each_entry(req, &hbl->list, hash_node)
+ seq_printf(m, " op=%d, task_works=%d\n", req->opcode,
+ task_work_pending(req->task));
}
+ if (has_lock)
+ mutex_unlock(&ctx->uring_lock);
+
seq_puts(m, "CqOverflowList:\n");
spin_lock(&ctx->completion_lock);
list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) {
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index 6f1d0e5df23a..411bb2d1acd4 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -1230,6 +1230,12 @@ static void io_wq_cancel_tw_create(struct io_wq *wq)
worker = container_of(cb, struct io_worker, create_work);
io_worker_cancel_cb(worker);
+ /*
+ * Only the worker continuation helper has worker allocated and
+ * hence needs freeing.
+ */
+ if (cb->func == create_worker_cont)
+ kfree(worker);
}
}
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 58ac13b69dc8..db623b3185c8 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -731,6 +731,8 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
size_t ocq_size = sizeof(struct io_overflow_cqe);
bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
+ lockdep_assert_held(&ctx->completion_lock);
+
if (is_cqe32)
ocq_size += sizeof(struct io_uring_cqe);
@@ -820,9 +822,6 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
{
struct io_uring_cqe *cqe;
- if (!ctx->task_complete)
- lockdep_assert_held(&ctx->completion_lock);
-
ctx->cq_extra++;
/*
@@ -1236,13 +1235,18 @@ static void io_req_local_work_add(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
- if (!llist_add(&req->io_task_work.node, &ctx->work_llist))
+ percpu_ref_get(&ctx->refs);
+
+ if (!llist_add(&req->io_task_work.node, &ctx->work_llist)) {
+ percpu_ref_put(&ctx->refs);
return;
+ }
/* need it for the following io_cqring_wake() */
smp_mb__after_atomic();
if (unlikely(atomic_read(&req->task->io_uring->in_idle))) {
io_move_task_work_from_local(ctx);
+ percpu_ref_put(&ctx->refs);
return;
}
@@ -1252,6 +1256,7 @@ static void io_req_local_work_add(struct io_kiocb *req)
if (ctx->has_evfd)
io_eventfd_signal(ctx);
__io_cqring_wake(ctx);
+ percpu_ref_put(&ctx->refs);
}
void __io_req_task_work_add(struct io_kiocb *req, bool allow_local)
@@ -1760,17 +1765,12 @@ queue:
}
spin_unlock(&ctx->completion_lock);
- ret = io_req_prep_async(req);
- if (ret) {
-fail:
- io_req_defer_failed(req, ret);
- return;
- }
io_prep_async_link(req);
de = kmalloc(sizeof(*de), GFP_KERNEL);
if (!de) {
ret = -ENOMEM;
- goto fail;
+ io_req_defer_failed(req, ret);
+ return;
}
spin_lock(&ctx->completion_lock);
@@ -2043,13 +2043,16 @@ static void io_queue_sqe_fallback(struct io_kiocb *req)
req->flags &= ~REQ_F_HARDLINK;
req->flags |= REQ_F_LINK;
io_req_defer_failed(req, req->cqe.res);
- } else if (unlikely(req->ctx->drain_active)) {
- io_drain_req(req);
} else {
int ret = io_req_prep_async(req);
- if (unlikely(ret))
+ if (unlikely(ret)) {
io_req_defer_failed(req, ret);
+ return;
+ }
+
+ if (unlikely(req->ctx->drain_active))
+ io_drain_req(req);
else
io_queue_iowq(req, NULL);
}
@@ -2465,7 +2468,7 @@ int io_run_task_work_sig(struct io_ring_ctx *ctx)
/* when returns >0, the caller should retry */
static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
struct io_wait_queue *iowq,
- ktime_t timeout)
+ ktime_t *timeout)
{
int ret;
unsigned long check_cq;
@@ -2483,7 +2486,7 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
if (check_cq & BIT(IO_CHECK_CQ_DROPPED_BIT))
return -EBADR;
}
- if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS))
+ if (!schedule_hrtimeout(timeout, HRTIMER_MODE_ABS))
return -ETIME;
/*
@@ -2559,7 +2562,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
}
prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq,
TASK_INTERRUPTIBLE);
- ret = io_cqring_wait_schedule(ctx, &iowq, timeout);
+ ret = io_cqring_wait_schedule(ctx, &iowq, &timeout);
if (__io_cqring_events_user(ctx) >= min_events)
break;
cond_resched();
@@ -3669,7 +3672,7 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
if (ctx->flags & IORING_SETUP_SINGLE_ISSUER
&& !(ctx->flags & IORING_SETUP_R_DISABLED))
- ctx->submitter_task = get_task_struct(current);
+ WRITE_ONCE(ctx->submitter_task, get_task_struct(current));
file = io_uring_get_file(ctx);
if (IS_ERR(file)) {
@@ -3863,7 +3866,7 @@ static int io_register_enable_rings(struct io_ring_ctx *ctx)
return -EBADFD;
if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && !ctx->submitter_task)
- ctx->submitter_task = get_task_struct(current);
+ WRITE_ONCE(ctx->submitter_task, get_task_struct(current));
if (ctx->restrictions.registered)
ctx->restricted = 1;
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index e9f0d41ebb99..ab4b2a1c3b7e 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -79,6 +79,19 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx);
bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
bool cancel_all);
+#define io_lockdep_assert_cq_locked(ctx) \
+ do { \
+ if (ctx->flags & IORING_SETUP_IOPOLL) { \
+ lockdep_assert_held(&ctx->uring_lock); \
+ } else if (!ctx->task_complete) { \
+ lockdep_assert_held(&ctx->completion_lock); \
+ } else if (ctx->submitter_task->flags & PF_EXITING) { \
+ lockdep_assert(current_work()); \
+ } else { \
+ lockdep_assert(current == ctx->submitter_task); \
+ } \
+ } while (0)
+
static inline void io_req_task_work_add(struct io_kiocb *req)
{
__io_req_task_work_add(req, true);
@@ -92,6 +105,8 @@ void io_cq_unlock_post(struct io_ring_ctx *ctx);
static inline struct io_uring_cqe *io_get_cqe_overflow(struct io_ring_ctx *ctx,
bool overflow)
{
+ io_lockdep_assert_cq_locked(ctx);
+
if (likely(ctx->cqe_cached < ctx->cqe_sentinel)) {
struct io_uring_cqe *cqe = ctx->cqe_cached;
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 2d3cd945a531..15602a136821 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -25,6 +25,28 @@ struct io_msg {
u32 flags;
};
+static void io_double_unlock_ctx(struct io_ring_ctx *octx)
+{
+ mutex_unlock(&octx->uring_lock);
+}
+
+static int io_double_lock_ctx(struct io_ring_ctx *octx,
+ unsigned int issue_flags)
+{
+ /*
+ * To ensure proper ordering between the two ctxs, we can only
+ * attempt a trylock on the target. If that fails and we already have
+ * the source ctx lock, punt to io-wq.
+ */
+ if (!(issue_flags & IO_URING_F_UNLOCKED)) {
+ if (!mutex_trylock(&octx->uring_lock))
+ return -EAGAIN;
+ return 0;
+ }
+ mutex_lock(&octx->uring_lock);
+ return 0;
+}
+
void io_msg_ring_cleanup(struct io_kiocb *req)
{
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
@@ -36,6 +58,29 @@ void io_msg_ring_cleanup(struct io_kiocb *req)
msg->src_file = NULL;
}
+static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx)
+{
+ if (!target_ctx->task_complete)
+ return false;
+ return current != target_ctx->submitter_task;
+}
+
+static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func)
+{
+ struct io_ring_ctx *ctx = req->file->private_data;
+ struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
+ struct task_struct *task = READ_ONCE(ctx->submitter_task);
+
+ if (unlikely(!task))
+ return -EOWNERDEAD;
+
+ init_task_work(&msg->tw, func);
+ if (task_work_add(ctx->submitter_task, &msg->tw, TWA_SIGNAL))
+ return -EOWNERDEAD;
+
+ return IOU_ISSUE_SKIP_COMPLETE;
+}
+
static void io_msg_tw_complete(struct callback_head *head)
{
struct io_msg *msg = container_of(head, struct io_msg, tw);
@@ -43,61 +88,54 @@ static void io_msg_tw_complete(struct callback_head *head)
struct io_ring_ctx *target_ctx = req->file->private_data;
int ret = 0;
- if (current->flags & PF_EXITING)
+ if (current->flags & PF_EXITING) {
ret = -EOWNERDEAD;
- else if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
- ret = -EOVERFLOW;
+ } else {
+ /*
+ * If the target ring is using IOPOLL mode, then we need to be
+ * holding the uring_lock for posting completions. Other ring
+ * types rely on the regular completion locking, which is
+ * handled while posting.
+ */
+ if (target_ctx->flags & IORING_SETUP_IOPOLL)
+ mutex_lock(&target_ctx->uring_lock);
+ if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
+ ret = -EOVERFLOW;
+ if (target_ctx->flags & IORING_SETUP_IOPOLL)
+ mutex_unlock(&target_ctx->uring_lock);
+ }
if (ret < 0)
req_set_fail(req);
io_req_queue_tw_complete(req, ret);
}
-static int io_msg_ring_data(struct io_kiocb *req)
+static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_ring_ctx *target_ctx = req->file->private_data;
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
+ int ret;
if (msg->src_fd || msg->dst_fd || msg->flags)
return -EINVAL;
+ if (target_ctx->flags & IORING_SETUP_R_DISABLED)
+ return -EBADFD;
- if (target_ctx->task_complete && current != target_ctx->submitter_task) {
- init_task_work(&msg->tw, io_msg_tw_complete);
- if (task_work_add(target_ctx->submitter_task, &msg->tw,
- TWA_SIGNAL_NO_IPI))
- return -EOWNERDEAD;
-
- atomic_or(IORING_SQ_TASKRUN, &target_ctx->rings->sq_flags);
- return IOU_ISSUE_SKIP_COMPLETE;
- }
-
- if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
- return 0;
+ if (io_msg_need_remote(target_ctx))
+ return io_msg_exec_remote(req, io_msg_tw_complete);
- return -EOVERFLOW;
-}
-
-static void io_double_unlock_ctx(struct io_ring_ctx *octx,
- unsigned int issue_flags)
-{
- mutex_unlock(&octx->uring_lock);
-}
-
-static int io_double_lock_ctx(struct io_ring_ctx *octx,
- unsigned int issue_flags)
-{
- /*
- * To ensure proper ordering between the two ctxs, we can only
- * attempt a trylock on the target. If that fails and we already have
- * the source ctx lock, punt to io-wq.
- */
- if (!(issue_flags & IO_URING_F_UNLOCKED)) {
- if (!mutex_trylock(&octx->uring_lock))
+ ret = -EOVERFLOW;
+ if (target_ctx->flags & IORING_SETUP_IOPOLL) {
+ if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
return -EAGAIN;
- return 0;
+ if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
+ ret = 0;
+ io_double_unlock_ctx(target_ctx);
+ } else {
+ if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
+ ret = 0;
}
- mutex_lock(&octx->uring_lock);
- return 0;
+ return ret;
}
static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags)
@@ -148,7 +186,7 @@ static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flag
if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
ret = -EOVERFLOW;
out_unlock:
- io_double_unlock_ctx(target_ctx, issue_flags);
+ io_double_unlock_ctx(target_ctx);
return ret;
}
@@ -174,6 +212,8 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
if (target_ctx == ctx)
return -EINVAL;
+ if (target_ctx->flags & IORING_SETUP_R_DISABLED)
+ return -EBADFD;
if (!src_file) {
src_file = io_msg_grab_file(req, issue_flags);
if (!src_file)
@@ -182,14 +222,8 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
req->flags |= REQ_F_NEED_CLEANUP;
}
- if (target_ctx->task_complete && current != target_ctx->submitter_task) {
- init_task_work(&msg->tw, io_msg_tw_fd_complete);
- if (task_work_add(target_ctx->submitter_task, &msg->tw,
- TWA_SIGNAL))
- return -EOWNERDEAD;
-
- return IOU_ISSUE_SKIP_COMPLETE;
- }
+ if (io_msg_need_remote(target_ctx))
+ return io_msg_exec_remote(req, io_msg_tw_fd_complete);
return io_msg_install_complete(req, issue_flags);
}
@@ -224,7 +258,7 @@ int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
switch (msg->cmd) {
case IORING_MSG_DATA:
- ret = io_msg_ring_data(req);
+ ret = io_msg_ring_data(req, issue_flags);
break;
case IORING_MSG_SEND_FD:
ret = io_msg_send_fd(req, issue_flags);
diff --git a/io_uring/net.c b/io_uring/net.c
index fbc34a7c2743..90326b279965 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -62,6 +62,7 @@ struct io_sr_msg {
u16 flags;
/* initialised and used only by !msg send variants */
u16 addr_len;
+ u16 buf_group;
void __user *addr;
/* used only for send zerocopy */
struct io_kiocb *notif;
@@ -580,6 +581,15 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (req->opcode == IORING_OP_RECV && sr->len)
return -EINVAL;
req->flags |= REQ_F_APOLL_MULTISHOT;
+ /*
+ * Store the buffer group for this multishot receive separately,
+ * as if we end up doing an io-wq based issue that selects a
+ * buffer, it has to be committed immediately and that will
+ * clear ->buf_list. This means we lose the link to the buffer
+ * list, and the eventual buffer put on completion then cannot
+ * restore it.
+ */
+ sr->buf_group = req->buf_index;
}
#ifdef CONFIG_COMPAT
@@ -596,6 +606,7 @@ static inline void io_recv_prep_retry(struct io_kiocb *req)
sr->done_io = 0;
sr->len = 0; /* get from the provided buffer */
+ req->buf_index = sr->buf_group;
}
/*
diff --git a/io_uring/poll.c b/io_uring/poll.c
index ee7da6150ec4..2ac1366adbd7 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -223,21 +223,22 @@ enum {
IOU_POLL_DONE = 0,
IOU_POLL_NO_ACTION = 1,
IOU_POLL_REMOVE_POLL_USE_RES = 2,
+ IOU_POLL_REISSUE = 3,
};
/*
* All poll tw should go through this. Checks for poll events, manages
* references, does rewait, etc.
*
- * Returns a negative error on failure. IOU_POLL_NO_ACTION when no action require,
- * which is either spurious wakeup or multishot CQE is served.
- * IOU_POLL_DONE when it's done with the request, then the mask is stored in req->cqe.res.
- * IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot poll and that the result
- * is stored in req->cqe.
+ * Returns a negative error on failure. IOU_POLL_NO_ACTION when no action
+ * require, which is either spurious wakeup or multishot CQE is served.
+ * IOU_POLL_DONE when it's done with the request, then the mask is stored in
+ * req->cqe.res. IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot
+ * poll and that the result is stored in req->cqe.
*/
static int io_poll_check_events(struct io_kiocb *req, bool *locked)
{
- int v, ret;
+ int v;
/* req->task == current here, checking PF_EXITING is safe */
if (unlikely(req->task->flags & PF_EXITING))
@@ -276,10 +277,19 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
if (!req->cqe.res) {
struct poll_table_struct pt = { ._key = req->apoll_events };
req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events;
+ /*
+ * We got woken with a mask, but someone else got to
+ * it first. The above vfs_poll() doesn't add us back
+ * to the waitqueue, so if we get nothing back, we
+ * should be safe and attempt a reissue.
+ */
+ if (unlikely(!req->cqe.res)) {
+ /* Multishot armed need not reissue */
+ if (!(req->apoll_events & EPOLLONESHOT))
+ continue;
+ return IOU_POLL_REISSUE;
+ }
}
-
- if ((unlikely(!req->cqe.res)))
- continue;
if (req->apoll_events & EPOLLONESHOT)
return IOU_POLL_DONE;
@@ -294,7 +304,7 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
return IOU_POLL_REMOVE_POLL_USE_RES;
}
} else {
- ret = io_poll_issue(req, locked);
+ int ret = io_poll_issue(req, locked);
if (ret == IOU_STOP_MULTISHOT)
return IOU_POLL_REMOVE_POLL_USE_RES;
if (ret < 0)
@@ -330,6 +340,9 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
poll = io_kiocb_to_cmd(req, struct io_poll);
req->cqe.res = mangle_poll(req->cqe.res & poll->events);
+ } else if (ret == IOU_POLL_REISSUE) {
+ io_req_task_submit(req, locked);
+ return;
} else if (ret != IOU_POLL_REMOVE_POLL_USE_RES) {
req->cqe.res = ret;
req_set_fail(req);
@@ -342,7 +355,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
if (ret == IOU_POLL_REMOVE_POLL_USE_RES)
io_req_task_complete(req, locked);
- else if (ret == IOU_POLL_DONE)
+ else if (ret == IOU_POLL_DONE || ret == IOU_POLL_REISSUE)
io_req_task_submit(req, locked);
else
io_req_defer_failed(req, ret);
@@ -533,6 +546,14 @@ static bool io_poll_can_finish_inline(struct io_kiocb *req,
return pt->owning || io_poll_get_ownership(req);
}
+static void io_poll_add_hash(struct io_kiocb *req)
+{
+ if (req->flags & REQ_F_HASH_LOCKED)
+ io_poll_req_insert_locked(req);
+ else
+ io_poll_req_insert(req);
+}
+
/*
* Returns 0 when it's handed over for polling. The caller owns the requests if
* it returns non-zero, but otherwise should not touch it. Negative values
@@ -591,18 +612,17 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
if (mask &&
((poll->events & (EPOLLET|EPOLLONESHOT)) == (EPOLLET|EPOLLONESHOT))) {
- if (!io_poll_can_finish_inline(req, ipt))
+ if (!io_poll_can_finish_inline(req, ipt)) {
+ io_poll_add_hash(req);
return 0;
+ }
io_poll_remove_entries(req);
ipt->result_mask = mask;
/* no one else has access to the req, forget about the ref */
return 1;
}
- if (req->flags & REQ_F_HASH_LOCKED)
- io_poll_req_insert_locked(req);
- else
- io_poll_req_insert(req);
+ io_poll_add_hash(req);
if (mask && (poll->events & EPOLLET) &&
io_poll_can_finish_inline(req, ipt)) {
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 8227af2e1c0f..9c3ddd46a1ad 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -1062,7 +1062,11 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
continue;
req->cqe.flags = io_put_kbuf(req, 0);
- io_fill_cqe_req(req->ctx, req);
+ if (unlikely(!__io_fill_cqe_req(ctx, req))) {
+ spin_lock(&ctx->completion_lock);
+ io_req_cqe_overflow(req);
+ spin_unlock(&ctx->completion_lock);
+ }
}
if (unlikely(!nr_events))
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index d09aa1c1e3e6..0160e9f2b07c 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -608,7 +608,7 @@ out_unlock:
return error;
}
-static int mqueue_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int mqueue_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
return mqueue_create_attr(dentry, mode, NULL);
@@ -887,7 +887,7 @@ static int prepare_open(struct dentry *dentry, int oflag, int ro,
if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
return -EINVAL;
acc = oflag2acc[oflag & O_ACCMODE];
- return inode_permission(&init_user_ns, d_inode(dentry), acc);
+ return inode_permission(&nop_mnt_idmap, d_inode(dentry), acc);
}
static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
@@ -979,7 +979,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
err = -ENOENT;
} else {
ihold(inode);
- err = vfs_unlink(&init_user_ns, d_inode(dentry->d_parent),
+ err = vfs_unlink(&nop_mnt_idmap, d_inode(dentry->d_parent),
dentry, NULL);
}
dput(dentry);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 547c88be8a28..93d0b87f3283 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -64,6 +64,7 @@
#include <uapi/linux/limits.h>
#include <uapi/linux/netfilter/nf_tables.h>
#include <uapi/linux/openat2.h> // struct open_how
+#include <uapi/linux/fanotify.h>
#include "audit.h"
@@ -2252,7 +2253,7 @@ static inline int audit_copy_fcaps(struct audit_names *name,
if (!dentry)
return 0;
- rc = get_vfs_caps_from_disk(&init_user_ns, dentry, &caps);
+ rc = get_vfs_caps_from_disk(&nop_mnt_idmap, dentry, &caps);
if (rc)
return rc;
@@ -2807,7 +2808,7 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
ax->d.next = context->aux;
context->aux = (void *)ax;
- get_vfs_caps_from_disk(&init_user_ns,
+ get_vfs_caps_from_disk(&nop_mnt_idmap,
bprm->file->f_path.dentry, &vcaps);
ax->fcap.permitted = vcaps.permitted;
@@ -2877,10 +2878,21 @@ void __audit_log_kern_module(char *name)
context->type = AUDIT_KERN_MODULE;
}
-void __audit_fanotify(unsigned int response)
+void __audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar)
{
- audit_log(audit_context(), GFP_KERNEL,
- AUDIT_FANOTIFY, "resp=%u", response);
+ /* {subj,obj}_trust values are {0,1,2}: no,yes,unknown */
+ switch (friar->hdr.type) {
+ case FAN_RESPONSE_INFO_NONE:
+ audit_log(audit_context(), GFP_KERNEL, AUDIT_FANOTIFY,
+ "resp=%u fan_type=%u fan_info=0 subj_trust=2 obj_trust=2",
+ response, FAN_RESPONSE_INFO_NONE);
+ break;
+ case FAN_RESPONSE_INFO_AUDIT_RULE:
+ audit_log(audit_context(), GFP_KERNEL, AUDIT_FANOTIFY,
+ "resp=%u fan_type=%u fan_info=%X subj_trust=%u obj_trust=%u",
+ response, friar->hdr.type, friar->rule_number,
+ friar->subj_trust, friar->obj_trust);
+ }
}
void __audit_tk_injoffset(struct timespec64 offset)
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 9ea42a45da47..e14c822f8911 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -51,7 +51,6 @@ BTF_SET_END(bpf_lsm_current_hooks)
*/
BTF_SET_START(bpf_lsm_locked_sockopt_hooks)
#ifdef CONFIG_SECURITY_NETWORK
-BTF_ID(func, bpf_lsm_socket_sock_rcv_skb)
BTF_ID(func, bpf_lsm_sock_graft)
BTF_ID(func, bpf_lsm_inet_csk_clone)
BTF_ID(func, bpf_lsm_inet_conn_established)
@@ -351,8 +350,10 @@ BTF_ID(func, bpf_lsm_bpf_prog_alloc_security)
BTF_ID(func, bpf_lsm_bpf_prog_free_security)
BTF_ID(func, bpf_lsm_file_alloc_security)
BTF_ID(func, bpf_lsm_file_free_security)
+#ifdef CONFIG_SECURITY_NETWORK
BTF_ID(func, bpf_lsm_sk_alloc_security)
BTF_ID(func, bpf_lsm_sk_free_security)
+#endif /* CONFIG_SECURITY_NETWORK */
BTF_ID(func, bpf_lsm_task_free)
BTF_SET_END(untrusted_lsm_hooks)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index f7dd8af06413..b7017cae6fd1 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -7782,9 +7782,9 @@ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_c
sort(tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func, NULL);
- return 0;
end:
- btf_free_dtor_kfunc_tab(btf);
+ if (ret)
+ btf_free_dtor_kfunc_tab(btf);
btf_put(btf);
return ret;
}
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 5aa2b5525f79..66bded144377 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -152,7 +152,7 @@ static inline int htab_lock_bucket(const struct bpf_htab *htab,
{
unsigned long flags;
- hash = hash & HASHTAB_MAP_LOCK_MASK;
+ hash = hash & min_t(u32, HASHTAB_MAP_LOCK_MASK, htab->n_buckets - 1);
preempt_disable();
if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) {
@@ -171,7 +171,7 @@ static inline void htab_unlock_bucket(const struct bpf_htab *htab,
struct bucket *b, u32 hash,
unsigned long flags)
{
- hash = hash & HASHTAB_MAP_LOCK_MASK;
+ hash = hash & min_t(u32, HASHTAB_MAP_LOCK_MASK, htab->n_buckets - 1);
raw_spin_unlock_irqrestore(&b->raw_lock, flags);
__this_cpu_dec(*(htab->map_locked[hash]));
preempt_enable();
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 4f841e16779e..9948b542a470 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -122,7 +122,7 @@ static struct inode *bpf_get_inode(struct super_block *sb,
inode->i_mtime = inode->i_atime;
inode->i_ctime = inode->i_atime;
- inode_init_owner(&init_user_ns, inode, dir, mode);
+ inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
return inode;
}
@@ -152,7 +152,7 @@ static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode,
dir->i_ctime = dir->i_mtime;
}
-static int bpf_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int bpf_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct inode *inode;
@@ -382,7 +382,7 @@ bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
return simple_lookup(dir, dentry, flags);
}
-static int bpf_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int bpf_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *target)
{
char *link = kstrdup(target, GFP_USER | __GFP_NOWARN);
@@ -559,7 +559,7 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type)
{
struct bpf_prog *prog;
- int ret = inode_permission(&init_user_ns, inode, MAY_READ);
+ int ret = inode_permission(&nop_mnt_idmap, inode, MAY_READ);
if (ret)
return ERR_PTR(ret);
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index ebcc3dd0fa19..1db156405b68 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -71,7 +71,7 @@ static int bpf_mem_cache_idx(size_t size)
if (size <= 192)
return size_index[(size - 1) / 8] - 1;
- return fls(size - 1) - 1;
+ return fls(size - 1) - 2;
}
#define NUM_CACHES 11
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 13e4efc971e6..190d9f9dc987 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -216,9 +216,6 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
if (offload->dev_state)
offload->offdev->ops->destroy(prog);
- /* Make sure BPF_PROG_GET_NEXT_ID can't find this dead program */
- bpf_prog_free_id(prog, true);
-
list_del_init(&offload->offloads);
kfree(offload);
prog->aux->offload = NULL;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 64131f88c553..ecca9366c7a6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1972,7 +1972,7 @@ static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op)
return;
if (audit_enabled == AUDIT_OFF)
return;
- if (op == BPF_AUDIT_LOAD)
+ if (!in_irq() && !irqs_disabled())
ctx = audit_context();
ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF);
if (unlikely(!ab))
@@ -2001,7 +2001,7 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog)
return id > 0 ? 0 : id;
}
-void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
+void bpf_prog_free_id(struct bpf_prog *prog)
{
unsigned long flags;
@@ -2013,18 +2013,10 @@ void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
if (!prog->aux->id)
return;
- if (do_idr_lock)
- spin_lock_irqsave(&prog_idr_lock, flags);
- else
- __acquire(&prog_idr_lock);
-
+ spin_lock_irqsave(&prog_idr_lock, flags);
idr_remove(&prog_idr, prog->aux->id);
prog->aux->id = 0;
-
- if (do_idr_lock)
- spin_unlock_irqrestore(&prog_idr_lock, flags);
- else
- __release(&prog_idr_lock);
+ spin_unlock_irqrestore(&prog_idr_lock, flags);
}
static void __bpf_prog_put_rcu(struct rcu_head *rcu)
@@ -2067,17 +2059,15 @@ static void bpf_prog_put_deferred(struct work_struct *work)
prog = aux->prog;
perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
+ bpf_prog_free_id(prog);
__bpf_prog_put_noref(prog, true);
}
-static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
+static void __bpf_prog_put(struct bpf_prog *prog)
{
struct bpf_prog_aux *aux = prog->aux;
if (atomic64_dec_and_test(&aux->refcnt)) {
- /* bpf_prog_free_id() must be called first */
- bpf_prog_free_id(prog, do_idr_lock);
-
if (in_irq() || irqs_disabled()) {
INIT_WORK(&aux->work, bpf_prog_put_deferred);
schedule_work(&aux->work);
@@ -2089,7 +2079,7 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
void bpf_prog_put(struct bpf_prog *prog)
{
- __bpf_prog_put(prog, true);
+ __bpf_prog_put(prog);
}
EXPORT_SYMBOL_GPL(bpf_prog_put);
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index c2a2182ce570..c4ab9d6cdbe9 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -438,6 +438,7 @@ struct bpf_iter_seq_task_vma_info {
*/
struct bpf_iter_seq_task_common common;
struct task_struct *task;
+ struct mm_struct *mm;
struct vm_area_struct *vma;
u32 tid;
unsigned long prev_vm_start;
@@ -456,16 +457,19 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
enum bpf_task_vma_iter_find_op op;
struct vm_area_struct *curr_vma;
struct task_struct *curr_task;
+ struct mm_struct *curr_mm;
u32 saved_tid = info->tid;
/* If this function returns a non-NULL vma, it holds a reference to
- * the task_struct, and holds read lock on vma->mm->mmap_lock.
+ * the task_struct, holds a refcount on mm->mm_users, and holds
+ * read lock on vma->mm->mmap_lock.
* If this function returns NULL, it does not hold any reference or
* lock.
*/
if (info->task) {
curr_task = info->task;
curr_vma = info->vma;
+ curr_mm = info->mm;
/* In case of lock contention, drop mmap_lock to unblock
* the writer.
*
@@ -504,13 +508,15 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
* 4.2) VMA2 and VMA2' covers different ranges, process
* VMA2'.
*/
- if (mmap_lock_is_contended(curr_task->mm)) {
+ if (mmap_lock_is_contended(curr_mm)) {
info->prev_vm_start = curr_vma->vm_start;
info->prev_vm_end = curr_vma->vm_end;
op = task_vma_iter_find_vma;
- mmap_read_unlock(curr_task->mm);
- if (mmap_read_lock_killable(curr_task->mm))
+ mmap_read_unlock(curr_mm);
+ if (mmap_read_lock_killable(curr_mm)) {
+ mmput(curr_mm);
goto finish;
+ }
} else {
op = task_vma_iter_next_vma;
}
@@ -535,42 +541,47 @@ again:
op = task_vma_iter_find_vma;
}
- if (!curr_task->mm)
+ curr_mm = get_task_mm(curr_task);
+ if (!curr_mm)
goto next_task;
- if (mmap_read_lock_killable(curr_task->mm))
+ if (mmap_read_lock_killable(curr_mm)) {
+ mmput(curr_mm);
goto finish;
+ }
}
switch (op) {
case task_vma_iter_first_vma:
- curr_vma = find_vma(curr_task->mm, 0);
+ curr_vma = find_vma(curr_mm, 0);
break;
case task_vma_iter_next_vma:
- curr_vma = find_vma(curr_task->mm, curr_vma->vm_end);
+ curr_vma = find_vma(curr_mm, curr_vma->vm_end);
break;
case task_vma_iter_find_vma:
/* We dropped mmap_lock so it is necessary to use find_vma
* to find the next vma. This is similar to the mechanism
* in show_smaps_rollup().
*/
- curr_vma = find_vma(curr_task->mm, info->prev_vm_end - 1);
+ curr_vma = find_vma(curr_mm, info->prev_vm_end - 1);
/* case 1) and 4.2) above just use curr_vma */
/* check for case 2) or case 4.1) above */
if (curr_vma &&
curr_vma->vm_start == info->prev_vm_start &&
curr_vma->vm_end == info->prev_vm_end)
- curr_vma = find_vma(curr_task->mm, curr_vma->vm_end);
+ curr_vma = find_vma(curr_mm, curr_vma->vm_end);
break;
}
if (!curr_vma) {
/* case 3) above, or case 2) 4.1) with vma->next == NULL */
- mmap_read_unlock(curr_task->mm);
+ mmap_read_unlock(curr_mm);
+ mmput(curr_mm);
goto next_task;
}
info->task = curr_task;
info->vma = curr_vma;
+ info->mm = curr_mm;
return curr_vma;
next_task:
@@ -579,6 +590,7 @@ next_task:
put_task_struct(curr_task);
info->task = NULL;
+ info->mm = NULL;
info->tid++;
goto again;
@@ -587,6 +599,7 @@ finish:
put_task_struct(curr_task);
info->task = NULL;
info->vma = NULL;
+ info->mm = NULL;
return NULL;
}
@@ -658,7 +671,9 @@ static void task_vma_seq_stop(struct seq_file *seq, void *v)
*/
info->prev_vm_start = ~0UL;
info->prev_vm_end = info->vma->vm_end;
- mmap_read_unlock(info->task->mm);
+ mmap_read_unlock(info->mm);
+ mmput(info->mm);
+ info->mm = NULL;
put_task_struct(info->task);
info->task = NULL;
}
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 11f5ec0b8016..d0ed7d6f5eec 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -488,6 +488,10 @@ again:
/* reset fops->func and fops->trampoline for re-register */
tr->fops->func = NULL;
tr->fops->trampoline = 0;
+
+ /* reset im->image memory attr for arch_prepare_bpf_trampoline */
+ set_memory_nx((long)im->image, 1);
+ set_memory_rw((long)im->image, 1);
goto again;
}
#endif
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a5255a0dcbb6..7ee218827259 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1054,6 +1054,8 @@ static void print_insn_state(struct bpf_verifier_env *env,
*/
static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
{
+ size_t alloc_bytes;
+ void *orig = dst;
size_t bytes;
if (ZERO_OR_NULL_PTR(src))
@@ -1062,11 +1064,11 @@ static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t
if (unlikely(check_mul_overflow(n, size, &bytes)))
return NULL;
- if (ksize(dst) < ksize(src)) {
- kfree(dst);
- dst = kmalloc_track_caller(kmalloc_size_roundup(bytes), flags);
- if (!dst)
- return NULL;
+ alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes));
+ dst = krealloc(orig, alloc_bytes, flags);
+ if (!dst) {
+ kfree(orig);
+ return NULL;
}
memcpy(dst, src, bytes);
@@ -2746,6 +2748,12 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
*/
if (insn->src_reg == 0 && is_callback_calling_function(insn->imm))
return -ENOTSUPP;
+ /* kfunc with imm==0 is invalid and fixup_kfunc_call will
+ * catch this error later. Make backtracking conservative
+ * with ENOTSUPP.
+ */
+ if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0)
+ return -ENOTSUPP;
/* regular helper call sets R0 */
*reg_mask &= ~1;
if (*reg_mask & 0x3f) {
@@ -3235,13 +3243,24 @@ static bool __is_pointer_value(bool allow_ptr_leaks,
return reg->type != SCALAR_VALUE;
}
+/* Copy src state preserving dst->parent and dst->live fields */
+static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src)
+{
+ struct bpf_reg_state *parent = dst->parent;
+ enum bpf_reg_liveness live = dst->live;
+
+ *dst = *src;
+ dst->parent = parent;
+ dst->live = live;
+}
+
static void save_register_state(struct bpf_func_state *state,
int spi, struct bpf_reg_state *reg,
int size)
{
int i;
- state->stack[spi].spilled_ptr = *reg;
+ copy_register_state(&state->stack[spi].spilled_ptr, reg);
if (size == BPF_REG_SIZE)
state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
@@ -3287,7 +3306,9 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
bool sanitize = reg && is_spillable_regtype(reg->type);
for (i = 0; i < size; i++) {
- if (state->stack[spi].slot_type[i] == STACK_INVALID) {
+ u8 type = state->stack[spi].slot_type[i];
+
+ if (type != STACK_MISC && type != STACK_ZERO) {
sanitize = true;
break;
}
@@ -3567,7 +3588,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
*/
s32 subreg_def = state->regs[dst_regno].subreg_def;
- state->regs[dst_regno] = *reg;
+ copy_register_state(&state->regs[dst_regno], reg);
state->regs[dst_regno].subreg_def = subreg_def;
} else {
for (i = 0; i < size; i++) {
@@ -3588,7 +3609,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
if (dst_regno >= 0) {
/* restore register state from stack */
- state->regs[dst_regno] = *reg;
+ copy_register_state(&state->regs[dst_regno], reg);
/* mark reg as written since spilled pointer state likely
* has its liveness marks cleared by is_state_visited()
* which resets stack/reg liveness for state transitions
@@ -9582,7 +9603,7 @@ do_sim:
*/
if (!ptr_is_dst_reg) {
tmp = *dst_reg;
- *dst_reg = *ptr_reg;
+ copy_register_state(dst_reg, ptr_reg);
}
ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
env->insn_idx);
@@ -10835,7 +10856,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
* to propagate min/max range.
*/
src_reg->id = ++env->id_gen;
- *dst_reg = *src_reg;
+ copy_register_state(dst_reg, src_reg);
dst_reg->live |= REG_LIVE_WRITTEN;
dst_reg->subreg_def = DEF_NOT_SUBREG;
} else {
@@ -10846,7 +10867,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
insn->src_reg);
return -EACCES;
} else if (src_reg->type == SCALAR_VALUE) {
- *dst_reg = *src_reg;
+ copy_register_state(dst_reg, src_reg);
/* Make sure ID is cleared otherwise
* dst_reg min/max could be incorrectly
* propagated into src_reg by find_equal_scalars()
@@ -11645,7 +11666,7 @@ static void find_equal_scalars(struct bpf_verifier_state *vstate,
bpf_for_each_reg_in_vstate(vstate, state, reg, ({
if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
- *reg = *known_reg;
+ copy_register_state(reg, known_reg);
}));
}
@@ -11822,10 +11843,17 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
* register B - not null
* for JNE A, B, ... - A is not null in the false branch;
* for JEQ A, B, ... - A is not null in the true branch.
+ *
+ * Since PTR_TO_BTF_ID points to a kernel struct that does
+ * not need to be null checked by the BPF program, i.e.,
+ * could be null even without PTR_MAYBE_NULL marking, so
+ * only propagate nullness when neither reg is that type.
*/
if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&
__is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) &&
- type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type)) {
+ type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) &&
+ base_type(src_reg->type) != PTR_TO_BTF_ID &&
+ base_type(dst_reg->type) != PTR_TO_BTF_ID) {
eq_branch_regs = NULL;
switch (opcode) {
case BPF_JEQ:
diff --git a/kernel/capability.c b/kernel/capability.c
index 860fd22117c1..339a44dfe2f4 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -486,11 +486,11 @@ EXPORT_SYMBOL(file_ns_capable);
* Return true if the inode uid and gid are within the namespace.
*/
bool privileged_wrt_inode_uidgid(struct user_namespace *ns,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
const struct inode *inode)
{
- return vfsuid_has_mapping(ns, i_uid_into_vfsuid(mnt_userns, inode)) &&
- vfsgid_has_mapping(ns, i_gid_into_vfsgid(mnt_userns, inode));
+ return vfsuid_has_mapping(ns, i_uid_into_vfsuid(idmap, inode)) &&
+ vfsgid_has_mapping(ns, i_gid_into_vfsgid(idmap, inode));
}
/**
@@ -502,13 +502,13 @@ bool privileged_wrt_inode_uidgid(struct user_namespace *ns,
* its own user namespace and that the given inode's uid and gid are
* mapped into the current user namespace.
*/
-bool capable_wrt_inode_uidgid(struct user_namespace *mnt_userns,
+bool capable_wrt_inode_uidgid(struct mnt_idmap *idmap,
const struct inode *inode, int cap)
{
struct user_namespace *ns = current_user_ns();
return ns_capable(ns, cap) &&
- privileged_wrt_inode_uidgid(ns, mnt_userns, inode);
+ privileged_wrt_inode_uidgid(ns, idmap, inode);
}
EXPORT_SYMBOL(capable_wrt_inode_uidgid);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index c099cf3fa02d..935e8121b21e 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5065,7 +5065,7 @@ static int cgroup_may_write(const struct cgroup *cgrp, struct super_block *sb)
if (!inode)
return -ENOMEM;
- ret = inode_permission(&init_user_ns, inode, MAY_WRITE);
+ ret = inode_permission(&nop_mnt_idmap, inode, MAY_WRITE);
iput(inode);
return ret;
}
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index a29c0b13706b..ca826bd1eba3 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1205,12 +1205,13 @@ void rebuild_sched_domains(void)
/**
* update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
* @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
+ * @new_cpus: the temp variable for the new effective_cpus mask
*
* Iterate through each task of @cs updating its cpus_allowed to the
* effective cpuset's. As this function is called with cpuset_rwsem held,
* cpuset membership stays stable.
*/
-static void update_tasks_cpumask(struct cpuset *cs)
+static void update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus)
{
struct css_task_iter it;
struct task_struct *task;
@@ -1224,7 +1225,10 @@ static void update_tasks_cpumask(struct cpuset *cs)
if (top_cs && (task->flags & PF_KTHREAD) &&
kthread_is_per_cpu(task))
continue;
- set_cpus_allowed_ptr(task, cs->effective_cpus);
+
+ cpumask_and(new_cpus, cs->effective_cpus,
+ task_cpu_possible_mask(task));
+ set_cpus_allowed_ptr(task, new_cpus);
}
css_task_iter_end(&it);
}
@@ -1346,7 +1350,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
* A parent can be left with no CPU as long as there is no
* task directly associated with the parent partition.
*/
- if (!cpumask_intersects(cs->cpus_allowed, parent->effective_cpus) &&
+ if (cpumask_subset(parent->effective_cpus, cs->cpus_allowed) &&
partition_is_populated(parent, cs))
return PERR_NOCPUS;
@@ -1509,7 +1513,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
spin_unlock_irq(&callback_lock);
if (adding || deleting)
- update_tasks_cpumask(parent);
+ update_tasks_cpumask(parent, tmp->new_cpus);
/*
* Set or clear CS_SCHED_LOAD_BALANCE when partcmd_update, if necessary.
@@ -1661,7 +1665,7 @@ update_parent_subparts:
WARN_ON(!is_in_v2_mode() &&
!cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
- update_tasks_cpumask(cp);
+ update_tasks_cpumask(cp, tmp->new_cpus);
/*
* On legacy hierarchy, if the effective cpumask of any non-
@@ -2309,7 +2313,7 @@ static int update_prstate(struct cpuset *cs, int new_prs)
}
}
- update_tasks_cpumask(parent);
+ update_tasks_cpumask(parent, tmpmask.new_cpus);
if (parent->child_ecpus_count)
update_sibling_cpumasks(parent, cs, &tmpmask);
@@ -2324,6 +2328,7 @@ out:
new_prs = -new_prs;
spin_lock_irq(&callback_lock);
cs->partition_root_state = new_prs;
+ WRITE_ONCE(cs->prs_err, err);
spin_unlock_irq(&callback_lock);
/*
* Update child cpusets, if present.
@@ -3347,7 +3352,7 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
* as the tasks will be migrated to an ancestor.
*/
if (cpus_updated && !cpumask_empty(cs->cpus_allowed))
- update_tasks_cpumask(cs);
+ update_tasks_cpumask(cs, new_cpus);
if (mems_updated && !nodes_empty(cs->mems_allowed))
update_tasks_nodemask(cs);
@@ -3384,7 +3389,7 @@ hotplug_update_tasks(struct cpuset *cs,
spin_unlock_irq(&callback_lock);
if (cpus_updated)
- update_tasks_cpumask(cs);
+ update_tasks_cpumask(cs, new_cpus);
if (mems_updated)
update_tasks_nodemask(cs);
}
@@ -3691,15 +3696,38 @@ void __init cpuset_init_smp(void)
* Description: Returns the cpumask_var_t cpus_allowed of the cpuset
* attached to the specified @tsk. Guaranteed to return some non-empty
* subset of cpu_online_mask, even if this means going outside the
- * tasks cpuset.
+ * tasks cpuset, except when the task is in the top cpuset.
**/
void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
{
unsigned long flags;
+ struct cpuset *cs;
spin_lock_irqsave(&callback_lock, flags);
- guarantee_online_cpus(tsk, pmask);
+ rcu_read_lock();
+
+ cs = task_cs(tsk);
+ if (cs != &top_cpuset)
+ guarantee_online_cpus(tsk, pmask);
+ /*
+ * Tasks in the top cpuset won't get update to their cpumasks
+ * when a hotplug online/offline event happens. So we include all
+ * offline cpus in the allowed cpu list.
+ */
+ if ((cs == &top_cpuset) || cpumask_empty(pmask)) {
+ const struct cpumask *possible_mask = task_cpu_possible_mask(tsk);
+
+ /*
+ * We first exclude cpus allocated to partitions. If there is no
+ * allowable online cpu left, we fall back to all possible cpus.
+ */
+ cpumask_andnot(pmask, possible_mask, top_cpuset.subparts_cpus);
+ if (!cpumask_intersects(pmask, cpu_online_mask))
+ cpumask_copy(pmask, possible_mask);
+ }
+
+ rcu_read_unlock();
spin_unlock_irqrestore(&callback_lock, flags);
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d56328e5080e..c4be13e50547 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4813,19 +4813,17 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
cpc = per_cpu_ptr(pmu->cpu_pmu_context, event->cpu);
epc = &cpc->epc;
-
+ raw_spin_lock_irq(&ctx->lock);
if (!epc->ctx) {
atomic_set(&epc->refcount, 1);
epc->embedded = 1;
- raw_spin_lock_irq(&ctx->lock);
list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list);
epc->ctx = ctx;
- raw_spin_unlock_irq(&ctx->lock);
} else {
WARN_ON_ONCE(epc->ctx != ctx);
atomic_inc(&epc->refcount);
}
-
+ raw_spin_unlock_irq(&ctx->lock);
return epc;
}
@@ -4896,33 +4894,30 @@ static void free_epc_rcu(struct rcu_head *head)
static void put_pmu_ctx(struct perf_event_pmu_context *epc)
{
+ struct perf_event_context *ctx = epc->ctx;
unsigned long flags;
- if (!atomic_dec_and_test(&epc->refcount))
+ /*
+ * XXX
+ *
+ * lockdep_assert_held(&ctx->mutex);
+ *
+ * can't because of the call-site in _free_event()/put_event()
+ * which isn't always called under ctx->mutex.
+ */
+ if (!atomic_dec_and_raw_lock_irqsave(&epc->refcount, &ctx->lock, flags))
return;
- if (epc->ctx) {
- struct perf_event_context *ctx = epc->ctx;
+ WARN_ON_ONCE(list_empty(&epc->pmu_ctx_entry));
- /*
- * XXX
- *
- * lockdep_assert_held(&ctx->mutex);
- *
- * can't because of the call-site in _free_event()/put_event()
- * which isn't always called under ctx->mutex.
- */
-
- WARN_ON_ONCE(list_empty(&epc->pmu_ctx_entry));
- raw_spin_lock_irqsave(&ctx->lock, flags);
- list_del_init(&epc->pmu_ctx_entry);
- epc->ctx = NULL;
- raw_spin_unlock_irqrestore(&ctx->lock, flags);
- }
+ list_del_init(&epc->pmu_ctx_entry);
+ epc->ctx = NULL;
WARN_ON_ONCE(!list_empty(&epc->pinned_active));
WARN_ON_ONCE(!list_empty(&epc->flexible_active));
+ raw_spin_unlock_irqrestore(&ctx->lock, flags);
+
if (epc->embedded)
return;
diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh
index 473036b43c83..81b97f0f6556 100755
--- a/kernel/gen_kheaders.sh
+++ b/kernel/gen_kheaders.sh
@@ -14,6 +14,8 @@ include/
arch/$SRCARCH/include/
"
+type cpio > /dev/null
+
# Support incremental builds by skipping archive generation
# if timestamps of files being archived are not changed.
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 8fe1da9614ee..798a9042421f 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -114,7 +114,7 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode)
{
struct irqchip_fwid *fwid;
- if (WARN_ON(!is_fwnode_irqchip(fwnode)))
+ if (!fwnode || WARN_ON(!is_fwnode_irqchip(fwnode)))
return;
fwid = container_of(fwnode, struct irqchip_fwid, fwnode);
@@ -1915,7 +1915,7 @@ static void debugfs_add_domain_dir(struct irq_domain *d)
static void debugfs_remove_domain_dir(struct irq_domain *d)
{
- debugfs_remove(debugfs_lookup(d->name, domain_dir));
+ debugfs_lookup_and_remove(d->name, domain_dir);
}
void __init irq_domain_debugfs_init(struct dentry *root)
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 955267bbc2be..783a3e6a0b10 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -1000,7 +1000,7 @@ bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
fail:
msi_unlock_descs(dev);
free_fwnode:
- kfree(fwnode);
+ irq_domain_free_fwnode(fwnode);
free_bundle:
kfree(bundle);
return false;
@@ -1013,6 +1013,7 @@ free_bundle:
*/
void msi_remove_device_irq_domain(struct device *dev, unsigned int domid)
{
+ struct fwnode_handle *fwnode = NULL;
struct msi_domain_info *info;
struct irq_domain *domain;
@@ -1025,7 +1026,10 @@ void msi_remove_device_irq_domain(struct device *dev, unsigned int domid)
dev->msi.data->__domains[domid].domain = NULL;
info = domain->host_data;
+ if (irq_domain_is_msi_device(domain))
+ fwnode = domain->fwnode;
irq_domain_remove(domain);
+ irq_domain_free_fwnode(fwnode);
kfree(container_of(info, struct msi_domain_template, info));
unlock:
diff --git a/kernel/kallsyms_selftest.c b/kernel/kallsyms_selftest.c
index f35d9cc1aab1..bfbc12da3326 100644
--- a/kernel/kallsyms_selftest.c
+++ b/kernel/kallsyms_selftest.c
@@ -157,14 +157,11 @@ static void test_kallsyms_compression_ratio(void)
static int lookup_name(void *data, const char *name, struct module *mod, unsigned long addr)
{
u64 t0, t1, t;
- unsigned long flags;
struct test_stat *stat = (struct test_stat *)data;
- local_irq_save(flags);
- t0 = sched_clock();
+ t0 = ktime_get_ns();
(void)kallsyms_lookup_name(name);
- t1 = sched_clock();
- local_irq_restore(flags);
+ t1 = ktime_get_ns();
t = t1 - t0;
if (t < stat->min)
@@ -234,18 +231,15 @@ static int find_symbol(void *data, const char *name, struct module *mod, unsigne
static void test_perf_kallsyms_on_each_symbol(void)
{
u64 t0, t1;
- unsigned long flags;
struct test_stat stat;
memset(&stat, 0, sizeof(stat));
stat.max = INT_MAX;
stat.name = stub_name;
stat.perf = 1;
- local_irq_save(flags);
- t0 = sched_clock();
+ t0 = ktime_get_ns();
kallsyms_on_each_symbol(find_symbol, &stat);
- t1 = sched_clock();
- local_irq_restore(flags);
+ t1 = ktime_get_ns();
pr_info("kallsyms_on_each_symbol() traverse all: %lld ns\n", t1 - t0);
}
@@ -270,17 +264,14 @@ static int match_symbol(void *data, unsigned long addr)
static void test_perf_kallsyms_on_each_match_symbol(void)
{
u64 t0, t1;
- unsigned long flags;
struct test_stat stat;
memset(&stat, 0, sizeof(stat));
stat.max = INT_MAX;
stat.name = stub_name;
- local_irq_save(flags);
- t0 = sched_clock();
+ t0 = ktime_get_ns();
kallsyms_on_each_match_symbol(match_symbol, stat.name, &stat);
- t1 = sched_clock();
- local_irq_restore(flags);
+ t1 = ktime_get_ns();
pr_info("kallsyms_on_each_match_symbol() traverse all: %lld ns\n", t1 - t0);
}
diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c
index dcec1b743c69..a60c561724be 100644
--- a/kernel/kcsan/kcsan_test.c
+++ b/kernel/kcsan/kcsan_test.c
@@ -159,7 +159,7 @@ static bool __report_matches(const struct expect_report *r)
const bool is_assert = (r->access[0].type | r->access[1].type) & KCSAN_ACCESS_ASSERT;
bool ret = false;
unsigned long flags;
- typeof(observed.lines) expect;
+ typeof(*observed.lines) *expect;
const char *end;
char *cur;
int i;
@@ -168,6 +168,10 @@ static bool __report_matches(const struct expect_report *r)
if (!report_available())
return false;
+ expect = kmalloc(sizeof(observed.lines), GFP_KERNEL);
+ if (WARN_ON(!expect))
+ return false;
+
/* Generate expected report contents. */
/* Title */
@@ -253,6 +257,7 @@ static bool __report_matches(const struct expect_report *r)
strstr(observed.lines[2], expect[1])));
out:
spin_unlock_irqrestore(&observed.lock, flags);
+ kfree(expect);
return ret;
}
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 010cf4e6d0b8..728f434de2bb 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -901,8 +901,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
* then we need to wake the new top waiter up to try
* to get the lock.
*/
- if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
- wake_up_state(waiter->task, waiter->wake_state);
+ top_waiter = rt_mutex_top_waiter(lock);
+ if (prerequeue_top_waiter != top_waiter)
+ wake_up_state(top_waiter->task, top_waiter->wake_state);
raw_spin_unlock_irq(&lock->wait_lock);
return 0;
}
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 48568a0f5651..4ac3fe43e6c8 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -2393,7 +2393,8 @@ static bool finished_loading(const char *name)
sched_annotate_sleep();
mutex_lock(&module_mutex);
mod = find_module_all(name, strlen(name), true);
- ret = !mod || mod->state == MODULE_STATE_LIVE;
+ ret = !mod || mod->state == MODULE_STATE_LIVE
+ || mod->state == MODULE_STATE_GOING;
mutex_unlock(&module_mutex);
return ret;
@@ -2569,20 +2570,35 @@ static int add_unformed_module(struct module *mod)
mod->state = MODULE_STATE_UNFORMED;
-again:
mutex_lock(&module_mutex);
old = find_module_all(mod->name, strlen(mod->name), true);
if (old != NULL) {
- if (old->state != MODULE_STATE_LIVE) {
+ if (old->state == MODULE_STATE_COMING
+ || old->state == MODULE_STATE_UNFORMED) {
/* Wait in case it fails to load. */
mutex_unlock(&module_mutex);
err = wait_event_interruptible(module_wq,
finished_loading(mod->name));
if (err)
goto out_unlocked;
- goto again;
+
+ /* The module might have gone in the meantime. */
+ mutex_lock(&module_mutex);
+ old = find_module_all(mod->name, strlen(mod->name),
+ true);
}
- err = -EEXIST;
+
+ /*
+ * We are here only when the same module was being loaded. Do
+ * not try to load it again right now. It prevents long delays
+ * caused by serialized module load failures. It might happen
+ * when more devices of the same type trigger load of
+ * a particular module.
+ */
+ if (old && old->state == MODULE_STATE_LIVE)
+ err = -EEXIST;
+ else
+ err = -EBUSY;
goto out;
}
mod_update_bounds(mod);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 7decf1e9c486..a5ed2e53547c 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -123,6 +123,7 @@ bool console_srcu_read_lock_is_held(void)
{
return srcu_read_lock_held(&console_srcu);
}
+EXPORT_SYMBOL(console_srcu_read_lock_is_held);
#endif
enum devkmsg_log_bits {
@@ -1891,6 +1892,7 @@ static void console_lock_spinning_enable(void)
/**
* console_lock_spinning_disable_and_check - mark end of code where another
* thread was able to busy wait and check if there is a waiter
+ * @cookie: cookie returned from console_srcu_read_lock()
*
* This is called at the end of the section where spinning is allowed.
* It has two functions. First, it is a signal that it is no longer
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 25b582b6ee5f..2a4918a1faa9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2604,27 +2604,71 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
.user_mask = NULL,
.flags = SCA_USER, /* clear the user requested mask */
};
+ union cpumask_rcuhead {
+ cpumask_t cpumask;
+ struct rcu_head rcu;
+ };
__do_set_cpus_allowed(p, &ac);
- kfree(ac.user_mask);
+
+ /*
+ * Because this is called with p->pi_lock held, it is not possible
+ * to use kfree() here (when PREEMPT_RT=y), therefore punt to using
+ * kfree_rcu().
+ */
+ kfree_rcu((union cpumask_rcuhead *)ac.user_mask, rcu);
+}
+
+static cpumask_t *alloc_user_cpus_ptr(int node)
+{
+ /*
+ * See do_set_cpus_allowed() above for the rcu_head usage.
+ */
+ int size = max_t(int, cpumask_size(), sizeof(struct rcu_head));
+
+ return kmalloc_node(size, GFP_KERNEL, node);
}
int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src,
int node)
{
+ cpumask_t *user_mask;
unsigned long flags;
- if (!src->user_cpus_ptr)
+ /*
+ * Always clear dst->user_cpus_ptr first as their user_cpus_ptr's
+ * may differ by now due to racing.
+ */
+ dst->user_cpus_ptr = NULL;
+
+ /*
+ * This check is racy and losing the race is a valid situation.
+ * It is not worth the extra overhead of taking the pi_lock on
+ * every fork/clone.
+ */
+ if (data_race(!src->user_cpus_ptr))
return 0;
- dst->user_cpus_ptr = kmalloc_node(cpumask_size(), GFP_KERNEL, node);
- if (!dst->user_cpus_ptr)
+ user_mask = alloc_user_cpus_ptr(node);
+ if (!user_mask)
return -ENOMEM;
- /* Use pi_lock to protect content of user_cpus_ptr */
+ /*
+ * Use pi_lock to protect content of user_cpus_ptr
+ *
+ * Though unlikely, user_cpus_ptr can be reset to NULL by a concurrent
+ * do_set_cpus_allowed().
+ */
raw_spin_lock_irqsave(&src->pi_lock, flags);
- cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr);
+ if (src->user_cpus_ptr) {
+ swap(dst->user_cpus_ptr, user_mask);
+ cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr);
+ }
raw_spin_unlock_irqrestore(&src->pi_lock, flags);
+
+ if (unlikely(user_mask))
+ kfree(user_mask);
+
return 0;
}
@@ -2907,8 +2951,11 @@ static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
}
if (!(ctx->flags & SCA_MIGRATE_ENABLE)) {
- if (cpumask_equal(&p->cpus_mask, ctx->new_mask))
+ if (cpumask_equal(&p->cpus_mask, ctx->new_mask)) {
+ if (ctx->flags & SCA_USER)
+ swap(p->user_cpus_ptr, ctx->user_mask);
goto out;
+ }
if (WARN_ON_ONCE(p == current &&
is_migration_disabled(p) &&
@@ -3581,6 +3628,11 @@ static inline bool rq_has_pinned_tasks(struct rq *rq)
return false;
}
+static inline cpumask_t *alloc_user_cpus_ptr(int node)
+{
+ return NULL;
+}
+
#endif /* !CONFIG_SMP */
static void
@@ -5504,7 +5556,9 @@ void scheduler_tick(void)
unsigned long thermal_pressure;
u64 resched_latency;
- arch_scale_freq_tick();
+ if (housekeeping_cpu(cpu, HK_TYPE_TICK))
+ arch_scale_freq_tick();
+
sched_clock_tick();
rq_lock(rq, &rf);
@@ -8239,12 +8293,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
if (retval)
goto out_put_task;
- user_mask = kmalloc(cpumask_size(), GFP_KERNEL);
- if (!user_mask) {
+ /*
+ * With non-SMP configs, user_cpus_ptr/user_mask isn't used and
+ * alloc_user_cpus_ptr() returns NULL.
+ */
+ user_mask = alloc_user_cpus_ptr(NUMA_NO_NODE);
+ if (user_mask) {
+ cpumask_copy(user_mask, in_mask);
+ } else if (IS_ENABLED(CONFIG_SMP)) {
retval = -ENOMEM;
goto out_put_task;
}
- cpumask_copy(user_mask, in_mask);
+
ac = (struct affinity_context){
.new_mask = in_mask,
.user_mask = user_mask,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c36aa54ae071..0f8736991427 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7229,10 +7229,10 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
eenv_task_busy_time(&eenv, p, prev_cpu);
for (; pd; pd = pd->next) {
+ unsigned long util_min = p_util_min, util_max = p_util_max;
unsigned long cpu_cap, cpu_thermal_cap, util;
unsigned long cur_delta, max_spare_cap = 0;
unsigned long rq_util_min, rq_util_max;
- unsigned long util_min, util_max;
unsigned long prev_spare_cap = 0;
int max_spare_cap_cpu = -1;
unsigned long base_energy;
@@ -7251,6 +7251,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
eenv.pd_cap = 0;
for_each_cpu(cpu, cpus) {
+ struct rq *rq = cpu_rq(cpu);
+
eenv.pd_cap += cpu_thermal_cap;
if (!cpumask_test_cpu(cpu, sched_domain_span(sd)))
@@ -7269,24 +7271,19 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
* much capacity we can get out of the CPU; this is
* aligned with sched_cpu_util().
*/
- if (uclamp_is_used()) {
- if (uclamp_rq_is_idle(cpu_rq(cpu))) {
- util_min = p_util_min;
- util_max = p_util_max;
- } else {
- /*
- * Open code uclamp_rq_util_with() except for
- * the clamp() part. Ie: apply max aggregation
- * only. util_fits_cpu() logic requires to
- * operate on non clamped util but must use the
- * max-aggregated uclamp_{min, max}.
- */
- rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN);
- rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX);
-
- util_min = max(rq_util_min, p_util_min);
- util_max = max(rq_util_max, p_util_max);
- }
+ if (uclamp_is_used() && !uclamp_rq_is_idle(rq)) {
+ /*
+ * Open code uclamp_rq_util_with() except for
+ * the clamp() part. Ie: apply max aggregation
+ * only. util_fits_cpu() logic requires to
+ * operate on non clamped util but must use the
+ * max-aggregated uclamp_{min, max}.
+ */
+ rq_util_min = uclamp_rq_get(rq, UCLAMP_MIN);
+ rq_util_max = uclamp_rq_get(rq, UCLAMP_MAX);
+
+ util_min = max(rq_util_min, p_util_min);
+ util_max = max(rq_util_max, p_util_max);
}
if (!util_fits_cpu(util, util_min, util_max, cpu))
continue;
@@ -8871,16 +8868,23 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu)
* * Thermal pressure will impact all cpus in this perf domain
* equally.
*/
- if (static_branch_unlikely(&sched_asym_cpucapacity)) {
+ if (sched_energy_enabled()) {
unsigned long inv_cap = capacity_orig - thermal_load_avg(rq);
- struct perf_domain *pd = rcu_dereference(rq->rd->pd);
+ struct perf_domain *pd;
+ rcu_read_lock();
+
+ pd = rcu_dereference(rq->rd->pd);
rq->cpu_capacity_inverted = 0;
for (; pd; pd = pd->next) {
struct cpumask *pd_span = perf_domain_span(pd);
unsigned long pd_cap_orig, pd_cap;
+ /* We can't be inverted against our own pd */
+ if (cpumask_test_cpu(cpu_of(rq), pd_span))
+ continue;
+
cpu = cpumask_any(pd_span);
pd_cap_orig = arch_scale_cpu_capacity(cpu);
@@ -8905,6 +8909,8 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu)
break;
}
}
+
+ rcu_read_unlock();
}
trace_sched_cpu_capacity_tp(rq);
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 8ac8b81bfee6..02e011cabe91 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -1343,10 +1343,11 @@ void psi_trigger_destroy(struct psi_trigger *t)
group = t->group;
/*
- * Wakeup waiters to stop polling. Can happen if cgroup is deleted
- * from under a polling process.
+ * Wakeup waiters to stop polling and clear the queue to prevent it from
+ * being accessed later. Can happen if cgroup is deleted from under a
+ * polling process.
*/
- wake_up_interruptible(&t->event_wait);
+ wake_up_pollfree(&t->event_wait);
mutex_lock(&group->trigger_lock);
diff --git a/kernel/sys.c b/kernel/sys.c
index 5fd54bf0e886..88b31f096fb2 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1442,6 +1442,8 @@ static int do_prlimit(struct task_struct *tsk, unsigned int resource,
if (resource >= RLIM_NLIMITS)
return -EINVAL;
+ resource = array_index_nospec(resource, RLIM_NLIMITS);
+
if (new_rlim) {
if (new_rlim->rlim_cur > new_rlim->rlim_max)
return -EINVAL;
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 5897828b9d7e..7e5dff602585 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -470,11 +470,35 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
}
EXPORT_SYMBOL_GPL(alarm_forward);
-u64 alarm_forward_now(struct alarm *alarm, ktime_t interval)
+static u64 __alarm_forward_now(struct alarm *alarm, ktime_t interval, bool throttle)
{
struct alarm_base *base = &alarm_bases[alarm->type];
+ ktime_t now = base->get_ktime();
+
+ if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && throttle) {
+ /*
+ * Same issue as with posix_timer_fn(). Timers which are
+ * periodic but the signal is ignored can starve the system
+ * with a very small interval. The real fix which was
+ * promised in the context of posix_timer_fn() never
+ * materialized, but someone should really work on it.
+ *
+ * To prevent DOS fake @now to be 1 jiffie out which keeps
+ * the overrun accounting correct but creates an
+ * inconsistency vs. timer_gettime(2).
+ */
+ ktime_t kj = NSEC_PER_SEC / HZ;
+
+ if (interval < kj)
+ now = ktime_add(now, kj);
+ }
+
+ return alarm_forward(alarm, now, interval);
+}
- return alarm_forward(alarm, base->get_ktime(), interval);
+u64 alarm_forward_now(struct alarm *alarm, ktime_t interval)
+{
+ return __alarm_forward_now(alarm, interval, false);
}
EXPORT_SYMBOL_GPL(alarm_forward_now);
@@ -551,9 +575,10 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm,
if (posix_timer_event(ptr, si_private) && ptr->it_interval) {
/*
* Handle ignored signals and rearm the timer. This will go
- * away once we handle ignored signals proper.
+ * away once we handle ignored signals proper. Ensure that
+ * small intervals cannot starve the system.
*/
- ptr->it_overrun += alarm_forward_now(alarm, ptr->it_interval);
+ ptr->it_overrun += __alarm_forward_now(alarm, ptr->it_interval, true);
++ptr->it_requeue_pending;
ptr->it_active = 1;
result = ALARMTIMER_RESTART;
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 475ecceda768..5e2c2c26b3cc 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -18,7 +18,7 @@
#include "tick-internal.h"
/**
- * tick_program_event
+ * tick_program_event - program the CPU local timer device for the next event
*/
int tick_program_event(ktime_t expires, int force)
{
@@ -99,7 +99,7 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))
}
/**
- * tick_check_oneshot_mode - check whether the system is in oneshot mode
+ * tick_oneshot_mode_active - check whether the system is in oneshot mode
*
* returns 1 when either nohz or highres are enabled. otherwise 0.
*/
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 526257b3727c..f4198af60fee 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -462,7 +462,7 @@ struct __kernel_old_timeval ns_to_kernel_old_timeval(s64 nsec)
EXPORT_SYMBOL(ns_to_kernel_old_timeval);
/**
- * set_normalized_timespec - set timespec sec and nsec parts and normalize
+ * set_normalized_timespec64 - set timespec sec and nsec parts and normalize
*
* @ts: pointer to timespec variable to be set
* @sec: seconds to set
@@ -526,7 +526,7 @@ struct timespec64 ns_to_timespec64(s64 nsec)
EXPORT_SYMBOL(ns_to_timespec64);
/**
- * msecs_to_jiffies: - convert milliseconds to jiffies
+ * __msecs_to_jiffies: - convert milliseconds to jiffies
* @m: time in milliseconds
*
* conversion is done as follows:
@@ -541,12 +541,12 @@ EXPORT_SYMBOL(ns_to_timespec64);
* handling any 32-bit overflows.
* for the details see __msecs_to_jiffies()
*
- * msecs_to_jiffies() checks for the passed in value being a constant
+ * __msecs_to_jiffies() checks for the passed in value being a constant
* via __builtin_constant_p() allowing gcc to eliminate most of the
* code, __msecs_to_jiffies() is called if the value passed does not
* allow constant folding and the actual conversion must be done at
* runtime.
- * the _msecs_to_jiffies helpers are the HZ dependent conversion
+ * The _msecs_to_jiffies helpers are the HZ dependent conversion
* routines found in include/linux/jiffies.h
*/
unsigned long __msecs_to_jiffies(const unsigned int m)
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f72b9f1de178..5579ead449f2 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1590,10 +1590,10 @@ void __weak read_persistent_clock64(struct timespec64 *ts)
/**
* read_persistent_wall_and_boot_offset - Read persistent clock, and also offset
* from the boot.
+ * @wall_time: current time as returned by persistent clock
+ * @boot_offset: offset that is defined as wall_time - boot_time
*
* Weak dummy function for arches that do not yet support it.
- * @wall_time: - current time as returned by persistent clock
- * @boot_offset: - offset that is defined as wall_time - boot_time
*
* The default function calculates offset based on the current value of
* local_clock(). This way architectures that support sched_clock() but don't
@@ -1701,7 +1701,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
}
#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE)
-/**
+/*
* We have three kinds of time sources to use for sleep time
* injection, the preference order is:
* 1) non-stop clocksource
@@ -1722,7 +1722,7 @@ bool timekeeping_rtc_skipresume(void)
return !suspend_timing_needed;
}
-/**
+/*
* 1) can be determined whether to use or not only when doing
* timekeeping_resume() which is invoked after rtc_suspend(),
* so we can't skip rtc_suspend() surely if system has 1).
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 197545241ab8..d7043043f59c 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -933,8 +933,8 @@ config RING_BUFFER_RECORD_RECURSION
default y
help
The ring buffer has its own internal recursion. Although when
- recursion happens it wont cause harm because of the protection,
- but it does cause an unwanted overhead. Enabling this option will
+ recursion happens it won't cause harm because of the protection,
+ but it does cause unwanted overhead. Enabling this option will
place where recursion was detected into the ftrace "recursed_functions"
file.
@@ -1017,8 +1017,8 @@ config RING_BUFFER_STARTUP_TEST
The test runs for 10 seconds. This will slow your boot time
by at least 10 more seconds.
- At the end of the test, statics and more checks are done.
- It will output the stats of each per cpu buffer. What
+ At the end of the test, statistics and more checks are done.
+ It will output the stats of each per cpu buffer: What
was written, the sizes, what was read, what was lost, and
other similar details.
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 3bbd3f0c810c..c09792c551bf 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -833,6 +833,7 @@ static void do_bpf_send_signal(struct irq_work *entry)
work = container_of(entry, struct send_signal_irq_work, irq_work);
group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, work->type);
+ put_task_struct(work->task);
}
static int bpf_send_signal_common(u32 sig, enum pid_type type)
@@ -848,6 +849,9 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type)
return -EPERM;
if (unlikely(!nmi_uaccess_okay()))
return -EPERM;
+ /* Task should not be pid=1 to avoid kernel panic. */
+ if (unlikely(is_global_init(current)))
+ return -EPERM;
if (irqs_disabled()) {
/* Do an early check on signal validity. Otherwise,
@@ -864,7 +868,7 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type)
* to the irq_work. The current task may change when queued
* irq works get executed.
*/
- work->task = current;
+ work->task = get_task_struct(current);
work->sig = sig;
work->type = type;
irq_work_queue(&work->irq_work);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 442438b93fe9..750aa3f08b25 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1248,12 +1248,17 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash)
call_rcu(&hash->rcu, __free_ftrace_hash_rcu);
}
+/**
+ * ftrace_free_filter - remove all filters for an ftrace_ops
+ * @ops - the ops to remove the filters from
+ */
void ftrace_free_filter(struct ftrace_ops *ops)
{
ftrace_ops_init(ops);
free_ftrace_hash(ops->func_hash->filter_hash);
free_ftrace_hash(ops->func_hash->notrace_hash);
}
+EXPORT_SYMBOL_GPL(ftrace_free_filter);
static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
{
@@ -5839,6 +5844,10 @@ EXPORT_SYMBOL_GPL(modify_ftrace_direct_multi);
*
* Filters denote which functions should be enabled when tracing is enabled
* If @ip is NULL, it fails to update filter.
+ *
+ * This can allocate memory which must be freed before @ops can be freed,
+ * either by removing each filtered addr or by using
+ * ftrace_free_filter(@ops).
*/
int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip,
int remove, int reset)
@@ -5858,7 +5867,11 @@ EXPORT_SYMBOL_GPL(ftrace_set_filter_ip);
*
* Filters denote which functions should be enabled when tracing is enabled
* If @ips array or any ip specified within is NULL , it fails to update filter.
- */
+ *
+ * This can allocate memory which must be freed before @ops can be freed,
+ * either by removing each filtered addr or by using
+ * ftrace_free_filter(@ops).
+*/
int ftrace_set_filter_ips(struct ftrace_ops *ops, unsigned long *ips,
unsigned int cnt, int remove, int reset)
{
@@ -5900,6 +5913,10 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
*
* Filters denote which functions should be enabled when tracing is enabled.
* If @buf is NULL and reset is set, all functions will be enabled for tracing.
+ *
+ * This can allocate memory which must be freed before @ops can be freed,
+ * either by removing each filtered addr or by using
+ * ftrace_free_filter(@ops).
*/
int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
int len, int reset)
@@ -5919,6 +5936,10 @@ EXPORT_SYMBOL_GPL(ftrace_set_filter);
* Notrace Filters denote which functions should not be enabled when tracing
* is enabled. If @buf is NULL and reset is set, all functions will be enabled
* for tracing.
+ *
+ * This can allocate memory which must be freed before @ops can be freed,
+ * either by removing each filtered addr or by using
+ * ftrace_free_filter(@ops).
*/
int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
int len, int reset)
diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c
index 6c97cc2d754a..7e9061828c24 100644
--- a/kernel/trace/rv/rv.c
+++ b/kernel/trace/rv/rv.c
@@ -516,7 +516,7 @@ static ssize_t enabled_monitors_write(struct file *filp, const char __user *user
struct rv_monitor_def *mdef;
int retval = -EINVAL;
bool enable = true;
- char *ptr = buff;
+ char *ptr;
int len;
if (count < 1 || count > MAX_RV_MONITOR_NAME_SIZE + 1)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a555a861b978..c9e40f692650 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -9148,9 +9148,6 @@ buffer_percent_write(struct file *filp, const char __user *ubuf,
if (val > 100)
return -EINVAL;
- if (!val)
- val = 1;
-
tr->buffer_percent = val;
(*ppos)++;
@@ -10295,6 +10292,8 @@ void __init early_trace_init(void)
static_key_enable(&tracepoint_printk_key.key);
}
tracer_alloc_buffers();
+
+ init_events();
}
void __init trace_init(void)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e46a49269be2..085a31b978a5 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1282,6 +1282,7 @@ struct ftrace_event_field {
int offset;
int size;
int is_signed;
+ int len;
};
struct prog_entry;
@@ -1490,6 +1491,7 @@ extern void trace_event_enable_cmd_record(bool enable);
extern void trace_event_enable_tgid_record(bool enable);
extern int event_trace_init(void);
+extern int init_events(void);
extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
extern int event_trace_del_tracer(struct trace_array *tr);
extern void __trace_early_add_events(struct trace_array *tr);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 33e0b4f8ebe6..6a942fa275c7 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -114,7 +114,7 @@ trace_find_event_field(struct trace_event_call *call, char *name)
static int __trace_define_field(struct list_head *head, const char *type,
const char *name, int offset, int size,
- int is_signed, int filter_type)
+ int is_signed, int filter_type, int len)
{
struct ftrace_event_field *field;
@@ -133,6 +133,7 @@ static int __trace_define_field(struct list_head *head, const char *type,
field->offset = offset;
field->size = size;
field->is_signed = is_signed;
+ field->len = len;
list_add(&field->link, head);
@@ -150,14 +151,28 @@ int trace_define_field(struct trace_event_call *call, const char *type,
head = trace_get_fields(call);
return __trace_define_field(head, type, name, offset, size,
- is_signed, filter_type);
+ is_signed, filter_type, 0);
}
EXPORT_SYMBOL_GPL(trace_define_field);
+static int trace_define_field_ext(struct trace_event_call *call, const char *type,
+ const char *name, int offset, int size, int is_signed,
+ int filter_type, int len)
+{
+ struct list_head *head;
+
+ if (WARN_ON(!call->class))
+ return 0;
+
+ head = trace_get_fields(call);
+ return __trace_define_field(head, type, name, offset, size,
+ is_signed, filter_type, len);
+}
+
#define __generic_field(type, item, filter_type) \
ret = __trace_define_field(&ftrace_generic_fields, #type, \
#item, 0, 0, is_signed_type(type), \
- filter_type); \
+ filter_type, 0); \
if (ret) \
return ret;
@@ -166,7 +181,7 @@ EXPORT_SYMBOL_GPL(trace_define_field);
"common_" #item, \
offsetof(typeof(ent), item), \
sizeof(ent.item), \
- is_signed_type(type), FILTER_OTHER); \
+ is_signed_type(type), FILTER_OTHER, 0); \
if (ret) \
return ret;
@@ -1588,12 +1603,17 @@ static int f_show(struct seq_file *m, void *v)
seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
field->type, field->name, field->offset,
field->size, !!field->is_signed);
- else
- seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
+ else if (field->len)
+ seq_printf(m, "\tfield:%.*s %s[%d];\toffset:%u;\tsize:%u;\tsigned:%d;\n",
(int)(array_descriptor - field->type),
field->type, field->name,
- array_descriptor, field->offset,
+ field->len, field->offset,
field->size, !!field->is_signed);
+ else
+ seq_printf(m, "\tfield:%.*s %s[];\toffset:%u;\tsize:%u;\tsigned:%d;\n",
+ (int)(array_descriptor - field->type),
+ field->type, field->name,
+ field->offset, field->size, !!field->is_signed);
return 0;
}
@@ -2379,9 +2399,10 @@ event_define_fields(struct trace_event_call *call)
}
offset = ALIGN(offset, field->align);
- ret = trace_define_field(call, field->type, field->name,
+ ret = trace_define_field_ext(call, field->type, field->name,
offset, field->size,
- field->is_signed, field->filter_type);
+ field->is_signed, field->filter_type,
+ field->len);
if (WARN_ON_ONCE(ret)) {
pr_err("error code is %d\n", ret);
break;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 96acc2b71ac7..e095c3b3a50d 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -128,7 +128,7 @@ static bool is_not(const char *str)
}
/**
- * prog_entry - a singe entry in the filter program
+ * struct prog_entry - a singe entry in the filter program
* @target: Index to jump to on a branch (actually one minus the index)
* @when_to_branch: The value of the result of the predicate to do a branch
* @pred: The predicate to execute.
@@ -140,16 +140,16 @@ struct prog_entry {
};
/**
- * update_preds- assign a program entry a label target
+ * update_preds - assign a program entry a label target
* @prog: The program array
* @N: The index of the current entry in @prog
- * @when_to_branch: What to assign a program entry for its branch condition
+ * @invert: What to assign a program entry for its branch condition
*
* The program entry at @N has a target that points to the index of a program
* entry that can have its target and when_to_branch fields updated.
* Update the current program entry denoted by index @N target field to be
* that of the updated entry. This will denote the entry to update if
- * we are processing an "||" after an "&&"
+ * we are processing an "||" after an "&&".
*/
static void update_preds(struct prog_entry *prog, int N, int invert)
{
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index fcaf226b7744..5edbf6b1da3f 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -1988,6 +1988,8 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
hist_field->fn_num = flags & HIST_FIELD_FL_LOG2 ? HIST_FIELD_FN_LOG2 :
HIST_FIELD_FN_BUCKET;
hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL);
+ if (!hist_field->operands[0])
+ goto free;
hist_field->size = hist_field->operands[0]->size;
hist_field->type = kstrdup_const(hist_field->operands[0]->type, GFP_KERNEL);
if (!hist_field->type)
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index d960f6b11b5e..58f3946081e2 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -111,7 +111,8 @@ static void __always_unused ____ftrace_check_##name(void) \
#define __array(_type, _item, _len) { \
.type = #_type"["__stringify(_len)"]", .name = #_item, \
.size = sizeof(_type[_len]), .align = __alignof__(_type), \
- is_signed_type(_type), .filter_type = FILTER_OTHER },
+ is_signed_type(_type), .filter_type = FILTER_OTHER, \
+ .len = _len },
#undef __array_desc
#define __array_desc(_type, _container, _item, _len) __array(_type, _item, _len)
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 94c1b5eb1dc0..210e1f168392 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -147,9 +147,8 @@ static void osnoise_unregister_instance(struct trace_array *tr)
* register/unregister serialization is provided by trace's
* trace_types_lock.
*/
- lockdep_assert_held(&trace_types_lock);
-
- list_for_each_entry_rcu(inst, &osnoise_instances, list) {
+ list_for_each_entry_rcu(inst, &osnoise_instances, list,
+ lockdep_is_held(&trace_types_lock)) {
if (inst->tr == tr) {
list_del_rcu(&inst->list);
found = 1;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 57a13b61f186..bd475a00f96d 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1535,7 +1535,7 @@ static struct trace_event *events[] __initdata = {
NULL
};
-__init static int init_events(void)
+__init int init_events(void)
{
struct trace_event *event;
int i, ret;
@@ -1548,4 +1548,3 @@ __init static int init_events(void)
return 0;
}
-early_initcall(init_events);
diff --git a/kernel/umh.c b/kernel/umh.c
index 850631518665..fbf872c624cb 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -438,21 +438,27 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
if (wait == UMH_NO_WAIT) /* task has freed sub_info */
goto unlock;
- if (wait & UMH_KILLABLE)
- state |= TASK_KILLABLE;
-
if (wait & UMH_FREEZABLE)
state |= TASK_FREEZABLE;
- retval = wait_for_completion_state(&done, state);
- if (!retval)
- goto wait_done;
-
if (wait & UMH_KILLABLE) {
+ retval = wait_for_completion_state(&done, state | TASK_KILLABLE);
+ if (!retval)
+ goto wait_done;
+
/* umh_complete() will see NULL and free sub_info */
if (xchg(&sub_info->complete, NULL))
goto unlock;
+
+ /*
+ * fallthrough; in case of -ERESTARTSYS now do uninterruptible
+ * wait_for_completion_state(). Since umh_complete() shall call
+ * complete() in a moment if xchg() above returned NULL, this
+ * uninterruptible wait_for_completion_state() will not block
+ * SIGKILL'ed processes for long.
+ */
}
+ wait_for_completion_state(&done, state);
wait_done:
retval = sub_info->retval;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 881c3f84e88a..02ee440f7be3 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -754,6 +754,7 @@ config DEBUG_KMEMLEAK
select KALLSYMS
select CRC32
select STACKDEPOT
+ select STACKDEPOT_ALWAYS_INIT if !DEBUG_KMEMLEAK_DEFAULT_OFF
help
Say Y here if you want to enable the memory leak
detector. The memory allocation/freeing is traced in a way
@@ -1207,7 +1208,7 @@ config SCHED_DEBUG
depends on DEBUG_KERNEL && PROC_FS
default y
help
- If you say Y here, the /proc/sched_debug file will be provided
+ If you say Y here, the /sys/kernel/debug/sched file will be provided
that can help debug the scheduler. The runtime overhead of this
option is minimal.
@@ -1917,7 +1918,7 @@ config FUNCTION_ERROR_INJECTION
help
Add fault injections into various functions that are annotated with
ALLOW_ERROR_INJECTION() in the kernel. BPF may also modify the return
- value of theses functions. This is useful to test error paths of code.
+ value of these functions. This is useful to test error paths of code.
If unsure, say N
@@ -2566,6 +2567,15 @@ config MEMCPY_KUNIT_TEST
If unsure, say N.
+config MEMCPY_SLOW_KUNIT_TEST
+ bool "Include exhaustive memcpy tests"
+ depends on MEMCPY_KUNIT_TEST
+ default y
+ help
+ Some memcpy tests are quite exhaustive in checking for overlaps
+ and bit ranges. These can be very slow, so they are split out
+ as a separate config, in case they need to be disabled.
+
config IS_SIGNED_TYPE_KUNIT_TEST
tristate "Test is_signed_type() macro" if !KUNIT_ALL_TESTS
depends on KUNIT
diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan
index 375575a5a0e3..4dedd61e5192 100644
--- a/lib/Kconfig.kcsan
+++ b/lib/Kconfig.kcsan
@@ -194,7 +194,7 @@ config KCSAN_WEAK_MEMORY
Enable support for modeling a subset of weak memory, which allows
detecting a subset of data races due to missing memory barriers.
- Depends on KCSAN_STRICT, because the options strenghtening certain
+ Depends on KCSAN_STRICT, because the options strengthening certain
plain accesses by default (depending on !KCSAN_STRICT) reduce the
ability to detect any data races invoving reordered accesses, in
particular reordered writes.
diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c
index 9555b68bb774..1dcca8f2e194 100644
--- a/lib/dec_and_lock.c
+++ b/lib/dec_and_lock.c
@@ -49,3 +49,34 @@ int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock,
return 0;
}
EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave);
+
+int _atomic_dec_and_raw_lock(atomic_t *atomic, raw_spinlock_t *lock)
+{
+ /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
+ if (atomic_add_unless(atomic, -1, 1))
+ return 0;
+
+ /* Otherwise do it the slow way */
+ raw_spin_lock(lock);
+ if (atomic_dec_and_test(atomic))
+ return 1;
+ raw_spin_unlock(lock);
+ return 0;
+}
+EXPORT_SYMBOL(_atomic_dec_and_raw_lock);
+
+int _atomic_dec_and_raw_lock_irqsave(atomic_t *atomic, raw_spinlock_t *lock,
+ unsigned long *flags)
+{
+ /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
+ if (atomic_add_unless(atomic, -1, 1))
+ return 0;
+
+ /* Otherwise do it the slow way */
+ raw_spin_lock_irqsave(lock, *flags);
+ if (atomic_dec_and_test(atomic))
+ return 1;
+ raw_spin_unlock_irqrestore(lock, *flags);
+ return 0;
+}
+EXPORT_SYMBOL(_atomic_dec_and_raw_lock_irqsave);
diff --git a/lib/kunit/assert.c b/lib/kunit/assert.c
index f5b50babe38d..05a09652f5a1 100644
--- a/lib/kunit/assert.c
+++ b/lib/kunit/assert.c
@@ -241,24 +241,34 @@ void kunit_mem_assert_format(const struct kunit_assert *assert,
mem_assert = container_of(assert, struct kunit_mem_assert,
assert);
- string_stream_add(stream,
- KUNIT_SUBTEST_INDENT "Expected %s %s %s, but\n",
- mem_assert->text->left_text,
- mem_assert->text->operation,
- mem_assert->text->right_text);
+ if (!mem_assert->left_value) {
+ string_stream_add(stream,
+ KUNIT_SUBTEST_INDENT "Expected %s is not null, but is\n",
+ mem_assert->text->left_text);
+ } else if (!mem_assert->right_value) {
+ string_stream_add(stream,
+ KUNIT_SUBTEST_INDENT "Expected %s is not null, but is\n",
+ mem_assert->text->right_text);
+ } else {
+ string_stream_add(stream,
+ KUNIT_SUBTEST_INDENT "Expected %s %s %s, but\n",
+ mem_assert->text->left_text,
+ mem_assert->text->operation,
+ mem_assert->text->right_text);
- string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s ==\n",
- mem_assert->text->left_text);
- kunit_assert_hexdump(stream, mem_assert->left_value,
- mem_assert->right_value, mem_assert->size);
+ string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s ==\n",
+ mem_assert->text->left_text);
+ kunit_assert_hexdump(stream, mem_assert->left_value,
+ mem_assert->right_value, mem_assert->size);
- string_stream_add(stream, "\n");
+ string_stream_add(stream, "\n");
- string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s ==\n",
- mem_assert->text->right_text);
- kunit_assert_hexdump(stream, mem_assert->right_value,
- mem_assert->left_value, mem_assert->size);
+ string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s ==\n",
+ mem_assert->text->right_text);
+ kunit_assert_hexdump(stream, mem_assert->right_value,
+ mem_assert->left_value, mem_assert->size);
- kunit_assert_print_msg(message, stream);
+ kunit_assert_print_msg(message, stream);
+ }
}
EXPORT_SYMBOL_GPL(kunit_mem_assert_format);
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index c9ebf975e56b..890ba5b3a981 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -21,6 +21,7 @@
#include "try-catch-impl.h"
DEFINE_STATIC_KEY_FALSE(kunit_running);
+EXPORT_SYMBOL_GPL(kunit_running);
#if IS_BUILTIN(CONFIG_KUNIT)
/*
diff --git a/lib/lockref.c b/lib/lockref.c
index 45e93ece8ba0..2afe4c5d8919 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -23,7 +23,6 @@
} \
if (!--retry) \
break; \
- cpu_relax(); \
} \
} while (0)
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 26e2045d3cda..5a976393c9ae 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -670,12 +670,13 @@ static inline unsigned long mte_pivot(const struct maple_enode *mn,
unsigned char piv)
{
struct maple_node *node = mte_to_node(mn);
+ enum maple_type type = mte_node_type(mn);
- if (piv >= mt_pivots[piv]) {
+ if (piv >= mt_pivots[type]) {
WARN_ON(1);
return 0;
}
- switch (mte_node_type(mn)) {
+ switch (type) {
case maple_arange_64:
return node->ma64.pivot[piv];
case maple_range_64:
@@ -4887,7 +4888,7 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
unsigned long *pivots, *gaps;
void __rcu **slots;
unsigned long gap = 0;
- unsigned long max, min, index;
+ unsigned long max, min;
unsigned char offset;
if (unlikely(mas_is_err(mas)))
@@ -4909,8 +4910,7 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
min = mas_safe_min(mas, pivots, --offset);
max = mas_safe_pivot(mas, pivots, offset, type);
- index = mas->index;
- while (index <= max) {
+ while (mas->index <= max) {
gap = 0;
if (gaps)
gap = gaps[offset];
@@ -4941,10 +4941,8 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
min = mas_safe_min(mas, pivots, offset);
}
- if (unlikely(index > max)) {
- mas_set_err(mas, -EBUSY);
- return false;
- }
+ if (unlikely((mas->index > max) || (size - 1 > max - mas->index)))
+ goto no_space;
if (unlikely(ma_is_leaf(type))) {
mas->offset = offset;
@@ -4961,9 +4959,11 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
return false;
ascend:
- if (mte_is_root(mas->node))
- mas_set_err(mas, -EBUSY);
+ if (!mte_is_root(mas->node))
+ return false;
+no_space:
+ mas_set_err(mas, -EBUSY);
return false;
}
diff --git a/lib/memcpy_kunit.c b/lib/memcpy_kunit.c
index 89128551448d..887926f04731 100644
--- a/lib/memcpy_kunit.c
+++ b/lib/memcpy_kunit.c
@@ -309,6 +309,8 @@ static void set_random_nonzero(struct kunit *test, u8 *byte)
static void init_large(struct kunit *test)
{
+ if (!IS_ENABLED(CONFIG_MEMCPY_SLOW_KUNIT_TEST))
+ kunit_skip(test, "Slow test skipped. Enable with CONFIG_MEMCPY_SLOW_KUNIT_TEST=y");
/* Get many bit patterns. */
get_random_bytes(large_src, ARRAY_SIZE(large_src));
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 9055e8b4d144..489e15bde5c1 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -10,6 +10,7 @@
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/jiffies.h>
+#include <linux/nospec.h>
#include <linux/skbuff.h>
#include <linux/string.h>
#include <linux/types.h>
@@ -381,6 +382,7 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
if (type <= 0 || type > maxtype)
return 0;
+ type = array_index_nospec(type, maxtype + 1);
pt = &policy[type];
BUG_ON(pt->type > NLA_TYPE_MAX);
@@ -596,6 +598,7 @@ static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype,
}
continue;
}
+ type = array_index_nospec(type, maxtype + 1);
if (policy) {
int err = validate_nla(nla, maxtype, policy,
validate, extack, depth);
diff --git a/lib/parser.c b/lib/parser.c
index bcb23484100e..2b5e2b480253 100644
--- a/lib/parser.c
+++ b/lib/parser.c
@@ -11,6 +11,15 @@
#include <linux/slab.h>
#include <linux/string.h>
+/*
+ * max size needed by different bases to express U64
+ * HEX: "0xFFFFFFFFFFFFFFFF" --> 18
+ * DEC: "18446744073709551615" --> 20
+ * OCT: "01777777777777777777777" --> 23
+ * pick the max one to define NUMBER_BUF_LEN
+ */
+#define NUMBER_BUF_LEN 24
+
/**
* match_one - Determines if a string matches a simple pattern
* @s: the string to examine for presence of the pattern
@@ -129,14 +138,12 @@ EXPORT_SYMBOL(match_token);
static int match_number(substring_t *s, int *result, int base)
{
char *endp;
- char *buf;
+ char buf[NUMBER_BUF_LEN];
int ret;
long val;
- buf = match_strdup(s);
- if (!buf)
- return -ENOMEM;
-
+ if (match_strlcpy(buf, s, NUMBER_BUF_LEN) >= NUMBER_BUF_LEN)
+ return -ERANGE;
ret = 0;
val = simple_strtol(buf, &endp, base);
if (endp == buf)
@@ -145,7 +152,6 @@ static int match_number(substring_t *s, int *result, int base)
ret = -ERANGE;
else
*result = (int) val;
- kfree(buf);
return ret;
}
@@ -163,18 +169,15 @@ static int match_number(substring_t *s, int *result, int base)
*/
static int match_u64int(substring_t *s, u64 *result, int base)
{
- char *buf;
+ char buf[NUMBER_BUF_LEN];
int ret;
u64 val;
- buf = match_strdup(s);
- if (!buf)
- return -ENOMEM;
-
+ if (match_strlcpy(buf, s, NUMBER_BUF_LEN) >= NUMBER_BUF_LEN)
+ return -ERANGE;
ret = kstrtoull(buf, base, &val);
if (!ret)
*result = val;
- kfree(buf);
return ret;
}
@@ -206,14 +209,12 @@ EXPORT_SYMBOL(match_int);
*/
int match_uint(substring_t *s, unsigned int *result)
{
- int err = -ENOMEM;
- char *buf = match_strdup(s);
+ char buf[NUMBER_BUF_LEN];
- if (buf) {
- err = kstrtouint(buf, 10, result);
- kfree(buf);
- }
- return err;
+ if (match_strlcpy(buf, s, NUMBER_BUF_LEN) >= NUMBER_BUF_LEN)
+ return -ERANGE;
+
+ return kstrtouint(buf, 10, result);
}
EXPORT_SYMBOL(match_uint);
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index a0ad2a7959b5..8d7519a8f308 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -470,22 +470,27 @@ int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append,
return -EOPNOTSUPP;
if (sgt_append->prv) {
+ unsigned long next_pfn = (page_to_phys(sg_page(sgt_append->prv)) +
+ sgt_append->prv->offset + sgt_append->prv->length) / PAGE_SIZE;
+
if (WARN_ON(offset))
return -EINVAL;
/* Merge contiguous pages into the last SG */
prv_len = sgt_append->prv->length;
- last_pg = sg_page(sgt_append->prv);
- while (n_pages && pages_are_mergeable(last_pg, pages[0])) {
- if (sgt_append->prv->length + PAGE_SIZE > max_segment)
- break;
- sgt_append->prv->length += PAGE_SIZE;
- last_pg = pages[0];
- pages++;
- n_pages--;
+ if (page_to_pfn(pages[0]) == next_pfn) {
+ last_pg = pfn_to_page(next_pfn - 1);
+ while (n_pages && pages_are_mergeable(pages[0], last_pg)) {
+ if (sgt_append->prv->length + PAGE_SIZE > max_segment)
+ break;
+ sgt_append->prv->length += PAGE_SIZE;
+ last_pg = pages[0];
+ pages++;
+ n_pages--;
+ }
+ if (!n_pages)
+ goto out;
}
- if (!n_pages)
- goto out;
}
/* compute number of contiguous chunks */
diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c
index 497fc93ccf9e..ec847bf4dcb4 100644
--- a/lib/test_maple_tree.c
+++ b/lib/test_maple_tree.c
@@ -2517,6 +2517,91 @@ static noinline void check_bnode_min_spanning(struct maple_tree *mt)
mt_set_non_kernel(0);
}
+static noinline void check_empty_area_window(struct maple_tree *mt)
+{
+ unsigned long i, nr_entries = 20;
+ MA_STATE(mas, mt, 0, 0);
+
+ for (i = 1; i <= nr_entries; i++)
+ mtree_store_range(mt, i*10, i*10 + 9,
+ xa_mk_value(i), GFP_KERNEL);
+
+ /* Create another hole besides the one at 0 */
+ mtree_store_range(mt, 160, 169, NULL, GFP_KERNEL);
+
+ /* Check lower bounds that don't fit */
+ rcu_read_lock();
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, 5, 90, 10) != -EBUSY);
+
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, 6, 90, 5) != -EBUSY);
+
+ /* Check lower bound that does fit */
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, 5, 90, 5) != 0);
+ MT_BUG_ON(mt, mas.index != 5);
+ MT_BUG_ON(mt, mas.last != 9);
+ rcu_read_unlock();
+
+ /* Check one gap that doesn't fit and one that does */
+ rcu_read_lock();
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, 5, 217, 9) != 0);
+ MT_BUG_ON(mt, mas.index != 161);
+ MT_BUG_ON(mt, mas.last != 169);
+
+ /* Check one gap that does fit above the min */
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, 100, 218, 3) != 0);
+ MT_BUG_ON(mt, mas.index != 216);
+ MT_BUG_ON(mt, mas.last != 218);
+
+ /* Check size that doesn't fit any gap */
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, 100, 218, 16) != -EBUSY);
+
+ /*
+ * Check size that doesn't fit the lower end of the window but
+ * does fit the gap
+ */
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, 167, 200, 4) != -EBUSY);
+
+ /*
+ * Check size that doesn't fit the upper end of the window but
+ * does fit the gap
+ */
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, 100, 162, 4) != -EBUSY);
+
+ /* Check mas_empty_area forward */
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area(&mas, 0, 100, 9) != 0);
+ MT_BUG_ON(mt, mas.index != 0);
+ MT_BUG_ON(mt, mas.last != 8);
+
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area(&mas, 0, 100, 4) != 0);
+ MT_BUG_ON(mt, mas.index != 0);
+ MT_BUG_ON(mt, mas.last != 3);
+
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area(&mas, 0, 100, 11) != -EBUSY);
+
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area(&mas, 5, 100, 6) != -EBUSY);
+
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area(&mas, 0, 8, 10) != -EBUSY);
+
+ mas_reset(&mas);
+ mas_empty_area(&mas, 100, 165, 3);
+
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area(&mas, 100, 163, 6) != -EBUSY);
+ rcu_read_unlock();
+}
+
static DEFINE_MTREE(tree);
static int maple_tree_seed(void)
{
@@ -2765,6 +2850,10 @@ static int maple_tree_seed(void)
check_bnode_min_spanning(&tree);
mtree_destroy(&tree);
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_empty_area_window(&tree);
+ mtree_destroy(&tree);
+
#if defined(BENCH)
skip:
#endif
diff --git a/lib/win_minmax.c b/lib/win_minmax.c
index 6bdc1cd15f76..ec10506834b6 100644
--- a/lib/win_minmax.c
+++ b/lib/win_minmax.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/**
+/*
* lib/minmax.c: windowed min/max tracker
*
* Kathleen Nichols' algorithm for tracking the minimum (or maximum)
diff --git a/mm/compaction.c b/mm/compaction.c
index ca1603524bbe..8238e83385a7 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1839,6 +1839,7 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc)
pfn = cc->zone->zone_start_pfn;
cc->fast_search_fail = 0;
found_block = true;
+ set_pageblock_skip(freepage);
break;
}
}
diff --git a/mm/filemap.c b/mm/filemap.c
index c4d4ace9cc70..0e20a8d6dd93 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2588,18 +2588,19 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
struct folio *folio;
int err = 0;
+ /* "last_index" is the index of the page beyond the end of the read */
last_index = DIV_ROUND_UP(iocb->ki_pos + iter->count, PAGE_SIZE);
retry:
if (fatal_signal_pending(current))
return -EINTR;
- filemap_get_read_batch(mapping, index, last_index, fbatch);
+ filemap_get_read_batch(mapping, index, last_index - 1, fbatch);
if (!folio_batch_count(fbatch)) {
if (iocb->ki_flags & IOCB_NOIO)
return -EAGAIN;
page_cache_sync_readahead(mapping, ra, filp, index,
last_index - index);
- filemap_get_read_batch(mapping, index, last_index, fbatch);
+ filemap_get_read_batch(mapping, index, last_index - 1, fbatch);
}
if (!folio_batch_count(fbatch)) {
if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
diff --git a/mm/gup.c b/mm/gup.c
index f45a3a5be53a..7c034514ddd8 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1914,7 +1914,7 @@ static unsigned long collect_longterm_unpinnable_pages(
drain_allow = false;
}
- if (!folio_isolate_lru(folio))
+ if (folio_isolate_lru(folio))
continue;
list_add_tail(&folio->lru, movable_page_list);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index abe6cfd92ffa..1b791b26d72d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3272,8 +3272,6 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
pmde = mk_huge_pmd(new, READ_ONCE(vma->vm_page_prot));
if (pmd_swp_soft_dirty(*pvmw->pmd))
pmde = pmd_mksoft_dirty(pmde);
- if (is_writable_migration_entry(entry))
- pmde = maybe_pmd_mkwrite(pmde, vma);
if (pmd_swp_uffd_wp(*pvmw->pmd))
pmde = pmd_wrprotect(pmd_mkuffd_wp(pmde));
if (!is_migration_entry_young(entry))
@@ -3281,6 +3279,10 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
/* NOTE: this may contain setting soft-dirty on some archs */
if (PageDirty(new) && is_migration_entry_dirty(entry))
pmde = pmd_mkdirty(pmde);
+ if (is_writable_migration_entry(entry))
+ pmde = maybe_pmd_mkwrite(pmde, vma);
+ else
+ pmde = pmd_wrprotect(pmde);
if (PageAnon(new)) {
rmap_t rmap_flags = RMAP_COMPOUND;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index db895230ee7e..bdbfeb6fb393 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -94,6 +94,8 @@ static int hugetlb_acct_memory(struct hstate *h, long delta);
static void hugetlb_vma_lock_free(struct vm_area_struct *vma);
static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma);
+static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end);
static inline bool subpool_is_free(struct hugepage_subpool *spool)
{
@@ -1181,7 +1183,7 @@ void hugetlb_dup_vma_private(struct vm_area_struct *vma)
/*
* Reset and decrement one ref on hugepage private reservation.
- * Called with mm->mmap_sem writer semaphore held.
+ * Called with mm->mmap_lock writer semaphore held.
* This function should be only used by move_vma() and operate on
* same sized vma. It should never come here with last ref on the
* reservation.
@@ -4834,6 +4836,25 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
{
if (addr & ~(huge_page_mask(hstate_vma(vma))))
return -EINVAL;
+
+ /*
+ * PMD sharing is only possible for PUD_SIZE-aligned address ranges
+ * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this
+ * split, unshare PMDs in the PUD_SIZE interval surrounding addr now.
+ */
+ if (addr & ~PUD_MASK) {
+ /*
+ * hugetlb_vm_op_split is called right before we attempt to
+ * split the VMA. We will need to unshare PMDs in the old and
+ * new VMAs, so let's unshare before we split.
+ */
+ unsigned long floor = addr & PUD_MASK;
+ unsigned long ceil = floor + PUD_SIZE;
+
+ if (floor >= vma->vm_start && ceil <= vma->vm_end)
+ hugetlb_unshare_pmds(vma, floor, ceil);
+ }
+
return 0;
}
@@ -5030,6 +5051,9 @@ again:
entry = huge_pte_clear_uffd_wp(entry);
set_huge_pte_at(dst, addr, dst_pte, entry);
} else if (unlikely(is_pte_marker(entry))) {
+ /* No swap on hugetlb */
+ WARN_ON_ONCE(
+ is_swapin_error_entry(pte_to_swp_entry(entry)));
/*
* We copy the pte marker only if the dst vma has
* uffd-wp enabled.
@@ -5131,7 +5155,7 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
/*
* We don't have to worry about the ordering of src and dst ptlocks
- * because exclusive mmap_sem (or the i_mmap_lock) prevents deadlock.
+ * because exclusive mmap_lock (or the i_mmap_lock) prevents deadlock.
*/
if (src_ptl != dst_ptl)
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
@@ -6639,8 +6663,17 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
spinlock_t *ptl;
ptep = huge_pte_offset(mm, address, psize);
if (!ptep) {
- address |= last_addr_mask;
- continue;
+ if (!uffd_wp) {
+ address |= last_addr_mask;
+ continue;
+ }
+ /*
+ * Userfaultfd wr-protect requires pgtable
+ * pre-allocations to install pte markers.
+ */
+ ptep = huge_pte_alloc(mm, vma, address, psize);
+ if (!ptep)
+ break;
}
ptl = huge_pte_lock(h, mm, ptep);
if (huge_pmd_unshare(mm, vma, address, ptep)) {
@@ -6658,16 +6691,13 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
}
pte = huge_ptep_get(ptep);
if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) {
- spin_unlock(ptl);
- continue;
- }
- if (unlikely(is_hugetlb_entry_migration(pte))) {
+ /* Nothing to do. */
+ } else if (unlikely(is_hugetlb_entry_migration(pte))) {
swp_entry_t entry = pte_to_swp_entry(pte);
struct page *page = pfn_swap_entry_to_page(entry);
+ pte_t newpte = pte;
- if (!is_readable_migration_entry(entry)) {
- pte_t newpte;
-
+ if (is_writable_migration_entry(entry)) {
if (PageAnon(page))
entry = make_readable_exclusive_migration_entry(
swp_offset(entry));
@@ -6675,25 +6705,22 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
entry = make_readable_migration_entry(
swp_offset(entry));
newpte = swp_entry_to_pte(entry);
- if (uffd_wp)
- newpte = pte_swp_mkuffd_wp(newpte);
- else if (uffd_wp_resolve)
- newpte = pte_swp_clear_uffd_wp(newpte);
- set_huge_pte_at(mm, address, ptep, newpte);
pages++;
}
- spin_unlock(ptl);
- continue;
- }
- if (unlikely(pte_marker_uffd_wp(pte))) {
- /*
- * This is changing a non-present pte into a none pte,
- * no need for huge_ptep_modify_prot_start/commit().
- */
+
+ if (uffd_wp)
+ newpte = pte_swp_mkuffd_wp(newpte);
+ else if (uffd_wp_resolve)
+ newpte = pte_swp_clear_uffd_wp(newpte);
+ if (!pte_same(pte, newpte))
+ set_huge_pte_at(mm, address, ptep, newpte);
+ } else if (unlikely(is_pte_marker(pte))) {
+ /* No other markers apply for now. */
+ WARN_ON_ONCE(!pte_marker_uffd_wp(pte));
if (uffd_wp_resolve)
+ /* Safe to modify directly (non-present->none). */
huge_pte_clear(mm, address, ptep, psize);
- }
- if (!huge_pte_none(pte)) {
+ } else if (!huge_pte_none(pte)) {
pte_t old_pte;
unsigned int shift = huge_page_shift(hstate_vma(vma));
@@ -7328,26 +7355,21 @@ void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int re
}
}
-/*
- * This function will unconditionally remove all the shared pmd pgtable entries
- * within the specific vma for a hugetlbfs memory range.
- */
-void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
+static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end)
{
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
struct mm_struct *mm = vma->vm_mm;
struct mmu_notifier_range range;
- unsigned long address, start, end;
+ unsigned long address;
spinlock_t *ptl;
pte_t *ptep;
if (!(vma->vm_flags & VM_MAYSHARE))
return;
- start = ALIGN(vma->vm_start, PUD_SIZE);
- end = ALIGN_DOWN(vma->vm_end, PUD_SIZE);
-
if (start >= end)
return;
@@ -7379,6 +7401,16 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
mmu_notifier_invalidate_range_end(&range);
}
+/*
+ * This function will unconditionally remove all the shared pmd pgtable entries
+ * within the specific vma for a hugetlbfs memory range.
+ */
+void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
+{
+ hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
+ ALIGN_DOWN(vma->vm_end, PUD_SIZE));
+}
+
#ifdef CONFIG_CMA
static bool cma_reserve_called __initdata;
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 833bf2cfd2a3..21e66d7f261d 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -246,6 +246,9 @@ bool __kasan_slab_free(struct kmem_cache *cache, void *object,
static inline bool ____kasan_kfree_large(void *ptr, unsigned long ip)
{
+ if (!kasan_arch_is_ready())
+ return false;
+
if (ptr != page_address(virt_to_head_page(ptr))) {
kasan_report_invalid_free(ptr, ip, KASAN_REPORT_INVALID_FREE);
return true;
diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
index b076f597a378..cb762982c8ba 100644
--- a/mm/kasan/generic.c
+++ b/mm/kasan/generic.c
@@ -191,7 +191,12 @@ bool kasan_check_range(unsigned long addr, size_t size, bool write,
bool kasan_byte_accessible(const void *addr)
{
- s8 shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(addr));
+ s8 shadow_byte;
+
+ if (!kasan_arch_is_ready())
+ return true;
+
+ shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(addr));
return shadow_byte >= 0 && shadow_byte < KASAN_GRANULE_SIZE;
}
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 1d02757e90a3..22598b20c7b7 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -119,7 +119,7 @@ EXPORT_SYMBOL_GPL(kasan_restore_multi_shot);
* Whether the KASAN KUnit test suite is currently being executed.
* Updated in kasan_test.c.
*/
-bool kasan_kunit_executing;
+static bool kasan_kunit_executing;
void kasan_kunit_test_suite_start(void)
{
diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c
index 2fba1f51f042..15cfb34d16a1 100644
--- a/mm/kasan/shadow.c
+++ b/mm/kasan/shadow.c
@@ -291,6 +291,9 @@ int kasan_populate_vmalloc(unsigned long addr, unsigned long size)
unsigned long shadow_start, shadow_end;
int ret;
+ if (!kasan_arch_is_ready())
+ return 0;
+
if (!is_vmalloc_or_module_addr((void *)addr))
return 0;
@@ -459,6 +462,9 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end,
unsigned long region_start, region_end;
unsigned long size;
+ if (!kasan_arch_is_ready())
+ return;
+
region_start = ALIGN(start, KASAN_MEMORY_PER_SHADOW_PAGE);
region_end = ALIGN_DOWN(end, KASAN_MEMORY_PER_SHADOW_PAGE);
@@ -502,6 +508,9 @@ void *__kasan_unpoison_vmalloc(const void *start, unsigned long size,
* with setting memory tags, so the KASAN_VMALLOC_INIT flag is ignored.
*/
+ if (!kasan_arch_is_ready())
+ return (void *)start;
+
if (!is_vmalloc_or_module_addr(start))
return (void *)start;
@@ -524,6 +533,9 @@ void *__kasan_unpoison_vmalloc(const void *start, unsigned long size,
*/
void __kasan_poison_vmalloc(const void *start, unsigned long size)
{
+ if (!kasan_arch_is_ready())
+ return;
+
if (!is_vmalloc_or_module_addr(start))
return;
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 5cb401aa2b9d..a26a28e3738c 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -847,6 +847,10 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
return SCAN_SUCCEED;
}
+/*
+ * See pmd_trans_unstable() for how the result may change out from
+ * underneath us, even if we hold mmap_lock in read.
+ */
static int find_pmd_or_thp_or_none(struct mm_struct *mm,
unsigned long address,
pmd_t **pmd)
@@ -865,8 +869,12 @@ static int find_pmd_or_thp_or_none(struct mm_struct *mm,
#endif
if (pmd_none(pmde))
return SCAN_PMD_NONE;
+ if (!pmd_present(pmde))
+ return SCAN_PMD_NULL;
if (pmd_trans_huge(pmde))
return SCAN_PMD_MAPPED;
+ if (pmd_devmap(pmde))
+ return SCAN_PMD_NULL;
if (pmd_bad(pmde))
return SCAN_PMD_NULL;
return SCAN_SUCCEED;
@@ -1460,14 +1468,6 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
if (!hugepage_vma_check(vma, vma->vm_flags, false, false, false))
return SCAN_VMA_CHECK;
- /*
- * Symmetry with retract_page_tables(): Exclude MAP_PRIVATE mappings
- * that got written to. Without this, we'd have to also lock the
- * anon_vma if one exists.
- */
- if (vma->anon_vma)
- return SCAN_VMA_CHECK;
-
/* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */
if (userfaultfd_wp(vma))
return SCAN_PTE_UFFD_WP;
@@ -1567,8 +1567,14 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
}
/* step 4: remove pte entries */
+ /* we make no change to anon, but protect concurrent anon page lookup */
+ if (vma->anon_vma)
+ anon_vma_lock_write(vma->anon_vma);
+
collapse_and_free_pmd(mm, vma, haddr, pmd);
+ if (vma->anon_vma)
+ anon_vma_unlock_write(vma->anon_vma);
i_mmap_unlock_write(vma->vm_file->f_mapping);
maybe_install_pmd:
@@ -1644,7 +1650,7 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
* has higher cost too. It would also probably require locking
* the anon_vma.
*/
- if (vma->anon_vma) {
+ if (READ_ONCE(vma->anon_vma)) {
result = SCAN_PAGE_ANON;
goto next;
}
@@ -1673,6 +1679,18 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
if ((cc->is_khugepaged || is_target) &&
mmap_write_trylock(mm)) {
/*
+ * Re-check whether we have an ->anon_vma, because
+ * collapse_and_free_pmd() requires that either no
+ * ->anon_vma exists or the anon_vma is locked.
+ * We already checked ->anon_vma above, but that check
+ * is racy because ->anon_vma can be populated under the
+ * mmap lock in read mode.
+ */
+ if (vma->anon_vma) {
+ result = SCAN_PAGE_ANON;
+ goto unlock_next;
+ }
+ /*
* When a vma is registered with uffd-wp, we can't
* recycle the pmd pgtable because there can be pte
* markers installed. Skip it only, so the rest mm/vma
@@ -2593,6 +2611,7 @@ static int madvise_collapse_errno(enum scan_result r)
case SCAN_CGROUP_CHARGE_FAIL:
return -EBUSY;
/* Resource temporary unavailable - trying again might succeed */
+ case SCAN_PAGE_COUNT:
case SCAN_PAGE_LOCK:
case SCAN_PAGE_LRU:
case SCAN_DEL_PAGE_LRU:
@@ -2649,7 +2668,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev,
goto out_nolock;
}
- hend = vma->vm_end & HPAGE_PMD_MASK;
+ hend = min(hend, vma->vm_end & HPAGE_PMD_MASK);
}
mmap_assert_locked(mm);
memset(cc->node_load, 0, sizeof(cc->node_load));
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 92f670edbf51..55dc8b8b0616 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -2070,8 +2070,10 @@ static int __init kmemleak_boot_config(char *str)
return -EINVAL;
if (strcmp(str, "off") == 0)
kmemleak_disable();
- else if (strcmp(str, "on") == 0)
+ else if (strcmp(str, "on") == 0) {
kmemleak_skip_disable = 1;
+ stack_depot_want_early_init();
+ }
else
return -EINVAL;
return 0;
@@ -2093,7 +2095,6 @@ void __init kmemleak_init(void)
if (kmemleak_error)
return;
- stack_depot_init();
jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE);
jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000);
diff --git a/mm/ksm.c b/mm/ksm.c
index dd02780c387f..addf490da146 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2629,8 +2629,11 @@ struct page *ksm_might_need_to_copy(struct page *page,
new_page = NULL;
}
if (new_page) {
- copy_user_highpage(new_page, page, address, vma);
-
+ if (copy_mc_user_highpage(new_page, page, address, vma)) {
+ put_page(new_page);
+ memory_failure_queue(page_to_pfn(page), 0);
+ return ERR_PTR(-EHWPOISON);
+ }
SetPageDirty(new_page);
__SetPageUptodate(new_page);
__SetPageLocked(new_page);
diff --git a/mm/madvise.c b/mm/madvise.c
index a56a6d17e201..18c2e2affac4 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -130,7 +130,7 @@ static int replace_anon_vma_name(struct vm_area_struct *vma,
#endif /* CONFIG_ANON_VMA_NAME */
/*
* Update the vm_flags on region of a vma, splitting it or merging it as
- * necessary. Must be called with mmap_sem held for writing;
+ * necessary. Must be called with mmap_lock held for writing;
* Caller should ensure anon_name stability by raising its refcount even when
* anon_name belongs to a valid vma because this function might free that vma.
*/
@@ -329,7 +329,7 @@ static inline bool can_do_file_pageout(struct vm_area_struct *vma)
* otherwise we'd be including shared non-exclusive mappings, which
* opens a side channel.
*/
- return inode_owner_or_capable(&init_user_ns,
+ return inode_owner_or_capable(&nop_mnt_idmap,
file_inode(vma->vm_file)) ||
file_permission(vma->vm_file, MAY_WRITE) == 0;
}
diff --git a/mm/memblock.c b/mm/memblock.c
index 511d4783dcf1..d036c7861310 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -836,7 +836,7 @@ void __init_memblock memblock_free(void *ptr, size_t size)
* @base: phys starting address of the boot memory block
* @size: size of the boot memory block in bytes
*
- * Free boot memory block previously allocated by memblock_alloc_xx() API.
+ * Free boot memory block previously allocated by memblock_phys_alloc_xx() API.
* The freeing memory will not be released to the buddy allocator.
*/
int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ab457f0394ab..73afff8062f9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -63,7 +63,6 @@
#include <linux/resume_user_mode.h>
#include <linux/psi.h>
#include <linux/seq_buf.h>
-#include <linux/parser.h>
#include "internal.h"
#include <net/sock.h>
#include <net/ip.h>
@@ -2393,8 +2392,7 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg,
psi_memstall_enter(&pflags);
nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages,
gfp_mask,
- MEMCG_RECLAIM_MAY_SWAP,
- NULL);
+ MEMCG_RECLAIM_MAY_SWAP);
psi_memstall_leave(&pflags);
} while ((memcg = parent_mem_cgroup(memcg)) &&
!mem_cgroup_is_root(memcg));
@@ -2685,8 +2683,7 @@ retry:
psi_memstall_enter(&pflags);
nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
- gfp_mask, reclaim_options,
- NULL);
+ gfp_mask, reclaim_options);
psi_memstall_leave(&pflags);
if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
@@ -3506,8 +3503,7 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
}
if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
- memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP,
- NULL)) {
+ memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP)) {
ret = -EBUSY;
break;
}
@@ -3618,8 +3614,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
return -EINTR;
if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
- MEMCG_RECLAIM_MAY_SWAP,
- NULL))
+ MEMCG_RECLAIM_MAY_SWAP))
nr_retries--;
}
@@ -6429,8 +6424,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
}
reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high,
- GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP,
- NULL);
+ GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP);
if (!reclaimed && !nr_retries--)
break;
@@ -6479,8 +6473,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
if (nr_reclaims) {
if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max,
- GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP,
- NULL))
+ GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP))
nr_reclaims--;
continue;
}
@@ -6603,54 +6596,21 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
return nbytes;
}
-enum {
- MEMORY_RECLAIM_NODES = 0,
- MEMORY_RECLAIM_NULL,
-};
-
-static const match_table_t if_tokens = {
- { MEMORY_RECLAIM_NODES, "nodes=%s" },
- { MEMORY_RECLAIM_NULL, NULL },
-};
-
static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
unsigned int nr_retries = MAX_RECLAIM_RETRIES;
unsigned long nr_to_reclaim, nr_reclaimed = 0;
- unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP |
- MEMCG_RECLAIM_PROACTIVE;
- char *old_buf, *start;
- substring_t args[MAX_OPT_ARGS];
- int token;
- char value[256];
- nodemask_t nodemask = NODE_MASK_ALL;
-
- buf = strstrip(buf);
-
- old_buf = buf;
- nr_to_reclaim = memparse(buf, &buf) / PAGE_SIZE;
- if (buf == old_buf)
- return -EINVAL;
+ unsigned int reclaim_options;
+ int err;
buf = strstrip(buf);
+ err = page_counter_memparse(buf, "", &nr_to_reclaim);
+ if (err)
+ return err;
- while ((start = strsep(&buf, " ")) != NULL) {
- if (!strlen(start))
- continue;
- token = match_token(start, if_tokens, args);
- match_strlcpy(value, args, sizeof(value));
- switch (token) {
- case MEMORY_RECLAIM_NODES:
- if (nodelist_parse(value, nodemask) < 0)
- return -EINVAL;
- break;
- default:
- return -EINVAL;
- }
- }
-
+ reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
while (nr_reclaimed < nr_to_reclaim) {
unsigned long reclaimed;
@@ -6667,8 +6627,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
reclaimed = try_to_free_mem_cgroup_pages(memcg,
nr_to_reclaim - nr_reclaimed,
- GFP_KERNEL, reclaim_options,
- &nodemask);
+ GFP_KERNEL, reclaim_options);
if (!reclaimed && !nr_retries--)
return -EAGAIN;
diff --git a/mm/memory.c b/mm/memory.c
index aad226daf41b..f526b9152bef 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -828,12 +828,8 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
return -EBUSY;
return -ENOENT;
} else if (is_pte_marker_entry(entry)) {
- /*
- * We're copying the pgtable should only because dst_vma has
- * uffd-wp enabled, do sanity check.
- */
- WARN_ON_ONCE(!userfaultfd_wp(dst_vma));
- set_pte_at(dst_mm, addr, dst_pte, pte);
+ if (is_swapin_error_entry(entry) || userfaultfd_wp(dst_vma))
+ set_pte_at(dst_mm, addr, dst_pte, pte);
return 0;
}
if (!userfaultfd_wp(dst_vma))
@@ -3629,8 +3625,12 @@ static vm_fault_t pte_marker_clear(struct vm_fault *vmf)
/*
* Be careful so that we will only recover a special uffd-wp pte into a
* none pte. Otherwise it means the pte could have changed, so retry.
+ *
+ * This should also cover the case where e.g. the pte changed
+ * quickly from a PTE_MARKER_UFFD_WP into PTE_MARKER_SWAPIN_ERROR.
+ * So is_pte_marker() check is not enough to safely drop the pte.
*/
- if (is_pte_marker(*vmf->pte))
+ if (pte_same(vmf->orig_pte, *vmf->pte))
pte_clear(vmf->vma->vm_mm, vmf->address, vmf->pte);
pte_unmap_unlock(vmf->pte, vmf->ptl);
return 0;
@@ -3840,6 +3840,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
if (unlikely(!page)) {
ret = VM_FAULT_OOM;
goto out_page;
+ } else if (unlikely(PTR_ERR(page) == -EHWPOISON)) {
+ ret = VM_FAULT_HWPOISON;
+ goto out_page;
}
folio = page_folio(page);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 02c8a712282f..f940395667c8 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -600,7 +600,8 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
/* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
if (flags & (MPOL_MF_MOVE_ALL) ||
- (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) {
+ (flags & MPOL_MF_MOVE && page_mapcount(page) == 1 &&
+ !hugetlb_pmd_shared(pte))) {
if (isolate_hugetlb(page, qp->pagelist) &&
(flags & MPOL_MF_STRICT))
/*
diff --git a/mm/migrate.c b/mm/migrate.c
index a4d3fc65085f..cc5455614e01 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -224,6 +224,8 @@ static bool remove_migration_pte(struct folio *folio,
pte = maybe_mkwrite(pte, vma);
else if (pte_swp_uffd_wp(*pvmw.pte))
pte = pte_mkuffd_wp(pte);
+ else
+ pte = pte_wrprotect(pte);
if (folio_test_anon(folio) && !is_readable_migration_entry(entry))
rmap_flags |= RMAP_EXCLUSIVE;
diff --git a/mm/mincore.c b/mm/mincore.c
index a085a2aeabd8..cd69b9db0081 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -168,7 +168,7 @@ static inline bool can_do_mincore(struct vm_area_struct *vma)
* for writing; otherwise we'd be including shared non-exclusive
* mappings, which opens a side channel.
*/
- return inode_owner_or_capable(&init_user_ns,
+ return inode_owner_or_capable(&nop_mnt_idmap,
file_inode(vma->vm_file)) ||
file_permission(vma->vm_file, MAY_WRITE) == 0;
}
diff --git a/mm/mmap.c b/mm/mmap.c
index 87d929316d57..425a9349e610 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1524,6 +1524,10 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
if (vma_soft_dirty_enabled(vma) && !is_vm_hugetlb_page(vma))
return 1;
+ /* Do we need write faults for uffd-wp tracking? */
+ if (userfaultfd_wp(vma))
+ return 1;
+
/* Specialty mapping? */
if (vm_flags & VM_PFNMAP)
return 0;
@@ -2290,7 +2294,7 @@ static inline int munmap_sidetree(struct vm_area_struct *vma,
* @start: The aligned start address to munmap.
* @end: The aligned end address to munmap.
* @uf: The userfaultfd list_head
- * @downgrade: Set to true to attempt a write downgrade of the mmap_sem
+ * @downgrade: Set to true to attempt a write downgrade of the mmap_lock
*
* If @downgrade is true, check return code for potential release of the lock.
*/
@@ -2465,7 +2469,7 @@ map_count_exceeded:
* @len: The length of the range to munmap
* @uf: The userfaultfd list_head
* @downgrade: set to true if the user wants to attempt to write_downgrade the
- * mmap_sem
+ * mmap_lock
*
* This function takes a @mas that is either pointing to the previous VMA or set
* to MA_START and sets it up to remove the mapping(s). The @len will be
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 908df12caa26..61cf60015a8b 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -245,7 +245,13 @@ static unsigned long change_pte_range(struct mmu_gather *tlb,
newpte = pte_swp_mksoft_dirty(newpte);
if (pte_swp_uffd_wp(oldpte))
newpte = pte_swp_mkuffd_wp(newpte);
- } else if (pte_marker_entry_uffd_wp(entry)) {
+ } else if (is_pte_marker_entry(entry)) {
+ /*
+ * Ignore swapin errors unconditionally,
+ * because any access should sigbus anyway.
+ */
+ if (is_swapin_error_entry(entry))
+ continue;
/*
* If this is uffd-wp pte marker and we'd like
* to unprotect it, drop it; the next page
diff --git a/mm/mremap.c b/mm/mremap.c
index fe587c5d6591..930f65c315c0 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -1027,16 +1027,29 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
}
/*
- * Function vma_merge() is called on the extension we are adding to
- * the already existing vma, vma_merge() will merge this extension with
- * the already existing vma (expand operation itself) and possibly also
- * with the next vma if it becomes adjacent to the expanded vma and
- * otherwise compatible.
+ * Function vma_merge() is called on the extension we
+ * are adding to the already existing vma, vma_merge()
+ * will merge this extension with the already existing
+ * vma (expand operation itself) and possibly also with
+ * the next vma if it becomes adjacent to the expanded
+ * vma and otherwise compatible.
+ *
+ * However, vma_merge() can currently fail due to
+ * is_mergeable_vma() check for vm_ops->close (see the
+ * comment there). Yet this should not prevent vma
+ * expanding, so perform a simple expand for such vma.
+ * Ideally the check for close op should be only done
+ * when a vma would be actually removed due to a merge.
*/
- vma = vma_merge(mm, vma, extension_start, extension_end,
+ if (!vma->vm_ops || !vma->vm_ops->close) {
+ vma = vma_merge(mm, vma, extension_start, extension_end,
vma->vm_flags, vma->anon_vma, vma->vm_file,
extension_pgoff, vma_policy(vma),
vma->vm_userfaultfd_ctx, anon_vma_name(vma));
+ } else if (vma_adjust(vma, vma->vm_start, addr + new_len,
+ vma->vm_pgoff, NULL)) {
+ vma = NULL;
+ }
if (!vma) {
vm_unacct_memory(pages);
ret = -ENOMEM;
diff --git a/mm/nommu.c b/mm/nommu.c
index 214c70e1d059..5b83938ecb67 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -559,7 +559,6 @@ void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas)
static void setup_vma_to_mm(struct vm_area_struct *vma, struct mm_struct *mm)
{
- mm->map_count++;
vma->vm_mm = mm;
/* add the VMA to the mapping */
@@ -587,6 +586,7 @@ static void mas_add_vma_to_mm(struct ma_state *mas, struct mm_struct *mm,
BUG_ON(!vma->vm_region);
setup_vma_to_mm(vma, mm);
+ mm->map_count++;
/* add the VMA to the tree */
vma_mas_store(vma, mas);
@@ -1240,6 +1240,7 @@ share:
error_just_free:
up_write(&nommu_region_sem);
error:
+ mas_destroy(&mas);
if (region->vm_file)
fput(region->vm_file);
kmem_cache_free(vm_region_jar, region);
@@ -1250,7 +1251,6 @@ error:
sharing_violation:
up_write(&nommu_region_sem);
- mas_destroy(&mas);
pr_warn("Attempt to share mismatched mappings\n");
ret = -EINVAL;
goto error;
@@ -1347,6 +1347,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
if (vma->vm_file)
return -ENOMEM;
+ mm = vma->vm_mm;
if (mm->map_count >= sysctl_max_map_count)
return -ENOMEM;
@@ -1398,6 +1399,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
mas_set_range(&mas, vma->vm_start, vma->vm_end - 1);
mas_store(&mas, vma);
vma_mas_store(new, &mas);
+ mm->map_count++;
return 0;
err_mas_preallocate:
@@ -1509,7 +1511,8 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list
erase_whole_vma:
if (delete_vma_from_mm(vma))
ret = -ENOMEM;
- delete_vma(mm, vma);
+ else
+ delete_vma(mm, vma);
return ret;
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0745aedebb37..3bb3484563ed 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5631,9 +5631,12 @@ EXPORT_SYMBOL(get_zeroed_page);
*/
void __free_pages(struct page *page, unsigned int order)
{
+ /* get PageHead before we drop reference */
+ int head = PageHead(page);
+
if (put_page_testzero(page))
free_the_page(page, order);
- else if (!PageHead(page))
+ else if (!head)
while (order-- > 0)
free_the_page(page + (1 << order), order);
}
diff --git a/mm/secretmem.c b/mm/secretmem.c
index 04c3ac9448a1..afcf46e99cda 100644
--- a/mm/secretmem.c
+++ b/mm/secretmem.c
@@ -162,7 +162,7 @@ const struct address_space_operations secretmem_aops = {
.migrate_folio = secretmem_migrate_folio,
};
-static int secretmem_setattr(struct user_namespace *mnt_userns,
+static int secretmem_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
@@ -175,7 +175,7 @@ static int secretmem_setattr(struct user_namespace *mnt_userns,
if ((ia_valid & ATTR_SIZE) && inode->i_size)
ret = -EINVAL;
else
- ret = simple_setattr(mnt_userns, dentry, iattr);
+ ret = simple_setattr(idmap, dentry, iattr);
filemap_invalidate_unlock(mapping);
diff --git a/mm/shmem.c b/mm/shmem.c
index c301487be5fb..41f82c5a5e28 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -478,12 +478,10 @@ bool shmem_is_huge(struct vm_area_struct *vma, struct inode *inode,
if (vma && ((vma->vm_flags & VM_NOHUGEPAGE) ||
test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)))
return false;
- if (shmem_huge_force)
- return true;
- if (shmem_huge == SHMEM_HUGE_FORCE)
- return true;
if (shmem_huge == SHMEM_HUGE_DENY)
return false;
+ if (shmem_huge_force || shmem_huge == SHMEM_HUGE_FORCE)
+ return true;
switch (SHMEM_SB(inode->i_sb)->huge) {
case SHMEM_HUGE_ALWAYS:
@@ -1047,7 +1045,7 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
}
EXPORT_SYMBOL_GPL(shmem_truncate_range);
-static int shmem_getattr(struct user_namespace *mnt_userns,
+static int shmem_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
@@ -1068,7 +1066,7 @@ static int shmem_getattr(struct user_namespace *mnt_userns,
stat->attributes_mask |= (STATX_ATTR_APPEND |
STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP);
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(idmap, inode, stat);
if (shmem_is_huge(NULL, inode, 0, false))
stat->blksize = HPAGE_PMD_SIZE;
@@ -1082,7 +1080,7 @@ static int shmem_getattr(struct user_namespace *mnt_userns,
return 0;
}
-static int shmem_setattr(struct user_namespace *mnt_userns,
+static int shmem_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
@@ -1091,7 +1089,7 @@ static int shmem_setattr(struct user_namespace *mnt_userns,
bool update_mtime = false;
bool update_ctime = true;
- error = setattr_prepare(&init_user_ns, dentry, attr);
+ error = setattr_prepare(idmap, dentry, attr);
if (error)
return error;
@@ -1129,9 +1127,9 @@ static int shmem_setattr(struct user_namespace *mnt_userns,
}
}
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(idmap, inode, attr);
if (attr->ia_valid & ATTR_MODE)
- error = posix_acl_chmod(&init_user_ns, dentry, inode->i_mode);
+ error = posix_acl_chmod(idmap, dentry, inode->i_mode);
if (!error && update_ctime) {
inode->i_ctime = current_time(inode);
if (update_mtime)
@@ -2329,8 +2327,9 @@ static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags)
#define shmem_initxattrs NULL
#endif
-static struct inode *shmem_get_inode(struct super_block *sb, struct inode *dir,
- umode_t mode, dev_t dev, unsigned long flags)
+static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block *sb,
+ struct inode *dir, umode_t mode, dev_t dev,
+ unsigned long flags)
{
struct inode *inode;
struct shmem_inode_info *info;
@@ -2343,7 +2342,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, struct inode *dir,
inode = new_inode(sb);
if (inode) {
inode->i_ino = ino;
- inode_init_owner(&init_user_ns, inode, dir, mode);
+ inode_init_owner(idmap, inode, dir, mode);
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
inode->i_generation = get_random_u32();
@@ -2915,13 +2914,13 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
* File creation. Allocate an inode, and we're done..
*/
static int
-shmem_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t dev)
{
struct inode *inode;
int error = -ENOSPC;
- inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
+ inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, dev, VM_NORESERVE);
if (inode) {
error = simple_acl_create(dir, inode);
if (error)
@@ -2946,13 +2945,13 @@ out_iput:
}
static int
-shmem_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+shmem_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
struct file *file, umode_t mode)
{
struct inode *inode;
int error = -ENOSPC;
- inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE);
+ inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, 0, VM_NORESERVE);
if (inode) {
error = security_inode_init_security(inode, dir,
NULL,
@@ -2970,22 +2969,22 @@ out_iput:
return error;
}
-static int shmem_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int shmem_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
int error;
- if ((error = shmem_mknod(&init_user_ns, dir, dentry,
- mode | S_IFDIR, 0)))
+ error = shmem_mknod(idmap, dir, dentry, mode | S_IFDIR, 0);
+ if (error)
return error;
inc_nlink(dir);
return 0;
}
-static int shmem_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int shmem_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
- return shmem_mknod(&init_user_ns, dir, dentry, mode | S_IFREG, 0);
+ return shmem_mknod(idmap, dir, dentry, mode | S_IFREG, 0);
}
/*
@@ -3045,7 +3044,7 @@ static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
return shmem_unlink(dir, dentry);
}
-static int shmem_whiteout(struct user_namespace *mnt_userns,
+static int shmem_whiteout(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry)
{
struct dentry *whiteout;
@@ -3055,7 +3054,7 @@ static int shmem_whiteout(struct user_namespace *mnt_userns,
if (!whiteout)
return -ENOMEM;
- error = shmem_mknod(&init_user_ns, old_dir, whiteout,
+ error = shmem_mknod(idmap, old_dir, whiteout,
S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
dput(whiteout);
if (error)
@@ -3078,7 +3077,7 @@ static int shmem_whiteout(struct user_namespace *mnt_userns,
* it exists so that the VFS layer correctly free's it when it
* gets overwritten.
*/
-static int shmem_rename2(struct user_namespace *mnt_userns,
+static int shmem_rename2(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
@@ -3098,7 +3097,7 @@ static int shmem_rename2(struct user_namespace *mnt_userns,
if (flags & RENAME_WHITEOUT) {
int error;
- error = shmem_whiteout(&init_user_ns, old_dir, old_dentry);
+ error = shmem_whiteout(idmap, old_dir, old_dentry);
if (error)
return error;
}
@@ -3124,7 +3123,7 @@ static int shmem_rename2(struct user_namespace *mnt_userns,
return 0;
}
-static int shmem_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
int error;
@@ -3136,7 +3135,7 @@ static int shmem_symlink(struct user_namespace *mnt_userns, struct inode *dir,
if (len > PAGE_SIZE)
return -ENAMETOOLONG;
- inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK | 0777, 0,
+ inode = shmem_get_inode(idmap, dir->i_sb, dir, S_IFLNK | 0777, 0,
VM_NORESERVE);
if (!inode)
return -ENOSPC;
@@ -3229,7 +3228,7 @@ static int shmem_fileattr_get(struct dentry *dentry, struct fileattr *fa)
return 0;
}
-static int shmem_fileattr_set(struct user_namespace *mnt_userns,
+static int shmem_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa)
{
struct inode *inode = d_inode(dentry);
@@ -3303,7 +3302,7 @@ static int shmem_xattr_handler_get(const struct xattr_handler *handler,
}
static int shmem_xattr_handler_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
@@ -3819,7 +3818,8 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
#endif
uuid_gen(&sb->s_uuid);
- inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
+ inode = shmem_get_inode(&nop_mnt_idmap, sb, NULL, S_IFDIR | sbinfo->mode, 0,
+ VM_NORESERVE);
if (!inode)
goto failed;
inode->i_uid = sbinfo->uid;
@@ -4044,7 +4044,11 @@ static struct file_system_type shmem_fs_type = {
.parameters = shmem_fs_parameters,
#endif
.kill_sb = kill_litter_super,
+#ifdef CONFIG_SHMEM
+ .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP,
+#else
.fs_flags = FS_USERNS_MOUNT,
+#endif
};
void __init shmem_init(void)
@@ -4196,7 +4200,7 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
#define shmem_vm_ops generic_file_vm_ops
#define shmem_anon_vm_ops generic_file_vm_ops
#define shmem_file_operations ramfs_file_operations
-#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
+#define shmem_get_inode(idmap, sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
#define shmem_acct_size(flags, size) 0
#define shmem_unacct_size(flags, size) do {} while (0)
@@ -4219,8 +4223,11 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, l
if (shmem_acct_size(flags, size))
return ERR_PTR(-ENOMEM);
- inode = shmem_get_inode(mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0,
- flags);
+ if (is_idmapped_mnt(mnt))
+ return ERR_PTR(-EINVAL);
+
+ inode = shmem_get_inode(&nop_mnt_idmap, mnt->mnt_sb, NULL,
+ S_IFREG | S_IRWXUGO, 0, flags);
if (unlikely(!inode)) {
shmem_unacct_size(flags, size);
return ERR_PTR(-ENOSPC);
diff --git a/mm/shrinker_debug.c b/mm/shrinker_debug.c
index b05295bab322..39c3491e28a3 100644
--- a/mm/shrinker_debug.c
+++ b/mm/shrinker_debug.c
@@ -246,18 +246,21 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
}
EXPORT_SYMBOL(shrinker_debugfs_rename);
-void shrinker_debugfs_remove(struct shrinker *shrinker)
+struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker)
{
+ struct dentry *entry = shrinker->debugfs_entry;
+
lockdep_assert_held(&shrinker_rwsem);
kfree_const(shrinker->name);
shrinker->name = NULL;
- if (!shrinker->debugfs_entry)
- return;
+ if (entry) {
+ ida_free(&shrinker_debugfs_ida, shrinker->debugfs_id);
+ shrinker->debugfs_entry = NULL;
+ }
- debugfs_remove_recursive(shrinker->debugfs_entry);
- ida_free(&shrinker_debugfs_ida, shrinker->debugfs_id);
+ return entry;
}
static int __init shrinker_debugfs_init(void)
diff --git a/mm/slab.c b/mm/slab.c
index 7a269db050ee..29300fc1289a 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2211,6 +2211,8 @@ static int drain_freelist(struct kmem_cache *cache,
raw_spin_unlock_irq(&n->list_lock);
slab_destroy(cache, slab);
nr_freed++;
+
+ cond_resched();
}
out:
return nr_freed;
diff --git a/mm/swap.c b/mm/swap.c
index 70e2063ef43a..4c03ecab698e 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -158,36 +158,6 @@ void put_pages_list(struct list_head *pages)
}
EXPORT_SYMBOL(put_pages_list);
-/*
- * get_kernel_pages() - pin kernel pages in memory
- * @kiov: An array of struct kvec structures
- * @nr_segs: number of segments to pin
- * @write: pinning for read/write, currently ignored
- * @pages: array that receives pointers to the pages pinned.
- * Should be at least nr_segs long.
- *
- * Returns number of pages pinned. This may be fewer than the number requested.
- * If nr_segs is 0 or negative, returns 0. If no pages were pinned, returns 0.
- * Each page returned must be released with a put_page() call when it is
- * finished with.
- */
-int get_kernel_pages(const struct kvec *kiov, int nr_segs, int write,
- struct page **pages)
-{
- int seg;
-
- for (seg = 0; seg < nr_segs; seg++) {
- if (WARN_ON(kiov[seg].iov_len != PAGE_SIZE))
- return seg;
-
- pages[seg] = kmap_to_page(kiov[seg].iov_base);
- get_page(pages[seg]);
- }
-
- return seg;
-}
-EXPORT_SYMBOL_GPL(get_kernel_pages);
-
typedef void (*move_fn_t)(struct lruvec *lruvec, struct folio *folio);
static void lru_add_fn(struct lruvec *lruvec, struct folio *folio)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 908a529bca12..eb9b0bf1fcdd 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1100,6 +1100,7 @@ start_over:
goto check_out;
pr_debug("scan_swap_map of si %d failed to find offset\n",
si->type);
+ cond_resched();
spin_lock(&swap_avail_lock);
nextsi:
@@ -1763,12 +1764,15 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
struct page *swapcache;
spinlock_t *ptl;
pte_t *pte, new_pte;
+ bool hwposioned = false;
int ret = 1;
swapcache = page;
page = ksm_might_need_to_copy(page, vma, addr);
if (unlikely(!page))
return -ENOMEM;
+ else if (unlikely(PTR_ERR(page) == -EHWPOISON))
+ hwposioned = true;
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
if (unlikely(!pte_same_as_swp(*pte, swp_entry_to_pte(entry)))) {
@@ -1776,15 +1780,19 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
goto out;
}
- if (unlikely(!PageUptodate(page))) {
- pte_t pteval;
+ if (unlikely(hwposioned || !PageUptodate(page))) {
+ swp_entry_t swp_entry;
dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
- pteval = swp_entry_to_pte(make_swapin_error_entry());
- set_pte_at(vma->vm_mm, addr, pte, pteval);
- swap_free(entry);
+ if (hwposioned) {
+ swp_entry = make_hwpoison_entry(swapcache);
+ page = swapcache;
+ } else {
+ swp_entry = make_swapin_error_entry();
+ }
+ new_pte = swp_entry_to_pte(swp_entry);
ret = 0;
- goto out;
+ goto setpte;
}
/* See do_swap_page() */
@@ -1816,6 +1824,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
new_pte = pte_mksoft_dirty(new_pte);
if (pte_swp_uffd_wp(*pte))
new_pte = pte_mkuffd_wp(new_pte);
+setpte:
set_pte_at(vma->vm_mm, addr, pte, new_pte);
swap_free(entry);
out:
diff --git a/mm/vmscan.c b/mm/vmscan.c
index bd6637fcd8f9..5b7b8d4f5297 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -741,6 +741,8 @@ EXPORT_SYMBOL(register_shrinker);
*/
void unregister_shrinker(struct shrinker *shrinker)
{
+ struct dentry *debugfs_entry;
+
if (!(shrinker->flags & SHRINKER_REGISTERED))
return;
@@ -749,9 +751,11 @@ void unregister_shrinker(struct shrinker *shrinker)
shrinker->flags &= ~SHRINKER_REGISTERED;
if (shrinker->flags & SHRINKER_MEMCG_AWARE)
unregister_memcg_shrinker(shrinker);
- shrinker_debugfs_remove(shrinker);
+ debugfs_entry = shrinker_debugfs_remove(shrinker);
up_write(&shrinker_rwsem);
+ debugfs_remove_recursive(debugfs_entry);
+
kfree(shrinker->nr_deferred);
shrinker->nr_deferred = NULL;
}
@@ -3323,13 +3327,16 @@ void lru_gen_migrate_mm(struct mm_struct *mm)
if (mem_cgroup_disabled())
return;
+ /* migration can happen before addition */
+ if (!mm->lru_gen.memcg)
+ return;
+
rcu_read_lock();
memcg = mem_cgroup_from_task(task);
rcu_read_unlock();
if (memcg == mm->lru_gen.memcg)
return;
- VM_WARN_ON_ONCE(!mm->lru_gen.memcg);
VM_WARN_ON_ONCE(list_empty(&mm->lru_gen.list));
lru_gen_del_mm(mm);
@@ -6754,8 +6761,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
- unsigned int reclaim_options,
- nodemask_t *nodemask)
+ unsigned int reclaim_options)
{
unsigned long nr_reclaimed;
unsigned int noreclaim_flag;
@@ -6770,7 +6776,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
.may_unmap = 1,
.may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
.proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
- .nodemask = nodemask,
};
/*
* Traverse the ZONELIST_FALLBACK zonelist of the current node to put
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 9445bee6b014..702bc3fd687a 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -113,7 +113,23 @@
* have room for two bit at least.
*/
#define OBJ_ALLOCATED_TAG 1
-#define OBJ_TAG_BITS 1
+
+#ifdef CONFIG_ZPOOL
+/*
+ * The second least-significant bit in the object's header identifies if the
+ * value stored at the header is a deferred handle from the last reclaim
+ * attempt.
+ *
+ * As noted above, this is valid because we have room for two bits.
+ */
+#define OBJ_DEFERRED_HANDLE_TAG 2
+#define OBJ_TAG_BITS 2
+#define OBJ_TAG_MASK (OBJ_ALLOCATED_TAG | OBJ_DEFERRED_HANDLE_TAG)
+#else
+#define OBJ_TAG_BITS 1
+#define OBJ_TAG_MASK OBJ_ALLOCATED_TAG
+#endif /* CONFIG_ZPOOL */
+
#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS)
#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
@@ -222,6 +238,12 @@ struct link_free {
* Handle of allocated object.
*/
unsigned long handle;
+#ifdef CONFIG_ZPOOL
+ /*
+ * Deferred handle of a reclaimed object.
+ */
+ unsigned long deferred_handle;
+#endif
};
};
@@ -272,8 +294,6 @@ struct zspage {
/* links the zspage to the lru list in the pool */
struct list_head lru;
bool under_reclaim;
- /* list of unfreed handles whose objects have been reclaimed */
- unsigned long *deferred_handles;
#endif
struct zs_pool *pool;
@@ -897,7 +917,8 @@ static unsigned long handle_to_obj(unsigned long handle)
return *(unsigned long *)handle;
}
-static bool obj_allocated(struct page *page, void *obj, unsigned long *phandle)
+static bool obj_tagged(struct page *page, void *obj, unsigned long *phandle,
+ int tag)
{
unsigned long handle;
struct zspage *zspage = get_zspage(page);
@@ -908,13 +929,27 @@ static bool obj_allocated(struct page *page, void *obj, unsigned long *phandle)
} else
handle = *(unsigned long *)obj;
- if (!(handle & OBJ_ALLOCATED_TAG))
+ if (!(handle & tag))
return false;
- *phandle = handle & ~OBJ_ALLOCATED_TAG;
+ /* Clear all tags before returning the handle */
+ *phandle = handle & ~OBJ_TAG_MASK;
return true;
}
+static inline bool obj_allocated(struct page *page, void *obj, unsigned long *phandle)
+{
+ return obj_tagged(page, obj, phandle, OBJ_ALLOCATED_TAG);
+}
+
+#ifdef CONFIG_ZPOOL
+static bool obj_stores_deferred_handle(struct page *page, void *obj,
+ unsigned long *phandle)
+{
+ return obj_tagged(page, obj, phandle, OBJ_DEFERRED_HANDLE_TAG);
+}
+#endif
+
static void reset_page(struct page *page)
{
__ClearPageMovable(page);
@@ -946,22 +981,36 @@ unlock:
}
#ifdef CONFIG_ZPOOL
+static unsigned long find_deferred_handle_obj(struct size_class *class,
+ struct page *page, int *obj_idx);
+
/*
* Free all the deferred handles whose objects are freed in zs_free.
*/
-static void free_handles(struct zs_pool *pool, struct zspage *zspage)
+static void free_handles(struct zs_pool *pool, struct size_class *class,
+ struct zspage *zspage)
{
- unsigned long handle = (unsigned long)zspage->deferred_handles;
+ int obj_idx = 0;
+ struct page *page = get_first_page(zspage);
+ unsigned long handle;
- while (handle) {
- unsigned long nxt_handle = handle_to_obj(handle);
+ while (1) {
+ handle = find_deferred_handle_obj(class, page, &obj_idx);
+ if (!handle) {
+ page = get_next_page(page);
+ if (!page)
+ break;
+ obj_idx = 0;
+ continue;
+ }
cache_free_handle(pool, handle);
- handle = nxt_handle;
+ obj_idx++;
}
}
#else
-static inline void free_handles(struct zs_pool *pool, struct zspage *zspage) {}
+static inline void free_handles(struct zs_pool *pool, struct size_class *class,
+ struct zspage *zspage) {}
#endif
static void __free_zspage(struct zs_pool *pool, struct size_class *class,
@@ -979,7 +1028,7 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class,
VM_BUG_ON(fg != ZS_EMPTY);
/* Free all deferred handles from zs_free */
- free_handles(pool, zspage);
+ free_handles(pool, class, zspage);
next = page = get_first_page(zspage);
do {
@@ -1067,7 +1116,6 @@ static void init_zspage(struct size_class *class, struct zspage *zspage)
#ifdef CONFIG_ZPOOL
INIT_LIST_HEAD(&zspage->lru);
zspage->under_reclaim = false;
- zspage->deferred_handles = NULL;
#endif
set_freeobj(zspage, 0);
@@ -1568,7 +1616,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
}
EXPORT_SYMBOL_GPL(zs_malloc);
-static void obj_free(int class_size, unsigned long obj)
+static void obj_free(int class_size, unsigned long obj, unsigned long *handle)
{
struct link_free *link;
struct zspage *zspage;
@@ -1582,15 +1630,29 @@ static void obj_free(int class_size, unsigned long obj)
zspage = get_zspage(f_page);
vaddr = kmap_atomic(f_page);
-
- /* Insert this object in containing zspage's freelist */
link = (struct link_free *)(vaddr + f_offset);
- if (likely(!ZsHugePage(zspage)))
- link->next = get_freeobj(zspage) << OBJ_TAG_BITS;
- else
- f_page->index = 0;
+
+ if (handle) {
+#ifdef CONFIG_ZPOOL
+ /* Stores the (deferred) handle in the object's header */
+ *handle |= OBJ_DEFERRED_HANDLE_TAG;
+ *handle &= ~OBJ_ALLOCATED_TAG;
+
+ if (likely(!ZsHugePage(zspage)))
+ link->deferred_handle = *handle;
+ else
+ f_page->index = *handle;
+#endif
+ } else {
+ /* Insert this object in containing zspage's freelist */
+ if (likely(!ZsHugePage(zspage)))
+ link->next = get_freeobj(zspage) << OBJ_TAG_BITS;
+ else
+ f_page->index = 0;
+ set_freeobj(zspage, f_objidx);
+ }
+
kunmap_atomic(vaddr);
- set_freeobj(zspage, f_objidx);
mod_zspage_inuse(zspage, -1);
}
@@ -1615,7 +1677,6 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
zspage = get_zspage(f_page);
class = zspage_class(pool, zspage);
- obj_free(class->size, obj);
class_stat_dec(class, OBJ_USED, 1);
#ifdef CONFIG_ZPOOL
@@ -1624,15 +1685,15 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
* Reclaim needs the handles during writeback. It'll free
* them along with the zspage when it's done with them.
*
- * Record current deferred handle at the memory location
- * whose address is given by handle.
+ * Record current deferred handle in the object's header.
*/
- record_obj(handle, (unsigned long)zspage->deferred_handles);
- zspage->deferred_handles = (unsigned long *)handle;
+ obj_free(class->size, obj, &handle);
spin_unlock(&pool->lock);
return;
}
#endif
+ obj_free(class->size, obj, NULL);
+
fullness = fix_fullness_group(class, zspage);
if (fullness == ZS_EMPTY)
free_zspage(pool, class, zspage);
@@ -1713,11 +1774,11 @@ static void zs_object_copy(struct size_class *class, unsigned long dst,
}
/*
- * Find alloced object in zspage from index object and
+ * Find object with a certain tag in zspage from index object and
* return handle.
*/
-static unsigned long find_alloced_obj(struct size_class *class,
- struct page *page, int *obj_idx)
+static unsigned long find_tagged_obj(struct size_class *class,
+ struct page *page, int *obj_idx, int tag)
{
unsigned int offset;
int index = *obj_idx;
@@ -1728,7 +1789,7 @@ static unsigned long find_alloced_obj(struct size_class *class,
offset += class->size * index;
while (offset < PAGE_SIZE) {
- if (obj_allocated(page, addr + offset, &handle))
+ if (obj_tagged(page, addr + offset, &handle, tag))
break;
offset += class->size;
@@ -1742,6 +1803,28 @@ static unsigned long find_alloced_obj(struct size_class *class,
return handle;
}
+/*
+ * Find alloced object in zspage from index object and
+ * return handle.
+ */
+static unsigned long find_alloced_obj(struct size_class *class,
+ struct page *page, int *obj_idx)
+{
+ return find_tagged_obj(class, page, obj_idx, OBJ_ALLOCATED_TAG);
+}
+
+#ifdef CONFIG_ZPOOL
+/*
+ * Find object storing a deferred handle in header in zspage from index object
+ * and return handle.
+ */
+static unsigned long find_deferred_handle_obj(struct size_class *class,
+ struct page *page, int *obj_idx)
+{
+ return find_tagged_obj(class, page, obj_idx, OBJ_DEFERRED_HANDLE_TAG);
+}
+#endif
+
struct zs_compact_control {
/* Source spage for migration which could be a subpage of zspage */
struct page *s_page;
@@ -1784,7 +1867,7 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
zs_object_copy(class, free_obj, used_obj);
obj_idx++;
record_obj(handle, free_obj);
- obj_free(class->size, used_obj);
+ obj_free(class->size, used_obj, NULL);
}
/* Remember last position in this iteration */
@@ -2478,6 +2561,90 @@ void zs_destroy_pool(struct zs_pool *pool)
EXPORT_SYMBOL_GPL(zs_destroy_pool);
#ifdef CONFIG_ZPOOL
+static void restore_freelist(struct zs_pool *pool, struct size_class *class,
+ struct zspage *zspage)
+{
+ unsigned int obj_idx = 0;
+ unsigned long handle, off = 0; /* off is within-page offset */
+ struct page *page = get_first_page(zspage);
+ struct link_free *prev_free = NULL;
+ void *prev_page_vaddr = NULL;
+
+ /* in case no free object found */
+ set_freeobj(zspage, (unsigned int)(-1UL));
+
+ while (page) {
+ void *vaddr = kmap_atomic(page);
+ struct page *next_page;
+
+ while (off < PAGE_SIZE) {
+ void *obj_addr = vaddr + off;
+
+ /* skip allocated object */
+ if (obj_allocated(page, obj_addr, &handle)) {
+ obj_idx++;
+ off += class->size;
+ continue;
+ }
+
+ /* free deferred handle from reclaim attempt */
+ if (obj_stores_deferred_handle(page, obj_addr, &handle))
+ cache_free_handle(pool, handle);
+
+ if (prev_free)
+ prev_free->next = obj_idx << OBJ_TAG_BITS;
+ else /* first free object found */
+ set_freeobj(zspage, obj_idx);
+
+ prev_free = (struct link_free *)vaddr + off / sizeof(*prev_free);
+ /* if last free object in a previous page, need to unmap */
+ if (prev_page_vaddr) {
+ kunmap_atomic(prev_page_vaddr);
+ prev_page_vaddr = NULL;
+ }
+
+ obj_idx++;
+ off += class->size;
+ }
+
+ /*
+ * Handle the last (full or partial) object on this page.
+ */
+ next_page = get_next_page(page);
+ if (next_page) {
+ if (!prev_free || prev_page_vaddr) {
+ /*
+ * There is no free object in this page, so we can safely
+ * unmap it.
+ */
+ kunmap_atomic(vaddr);
+ } else {
+ /* update prev_page_vaddr since prev_free is on this page */
+ prev_page_vaddr = vaddr;
+ }
+ } else { /* this is the last page */
+ if (prev_free) {
+ /*
+ * Reset OBJ_TAG_BITS bit to last link to tell
+ * whether it's allocated object or not.
+ */
+ prev_free->next = -1UL << OBJ_TAG_BITS;
+ }
+
+ /* unmap previous page (if not done yet) */
+ if (prev_page_vaddr) {
+ kunmap_atomic(prev_page_vaddr);
+ prev_page_vaddr = NULL;
+ }
+
+ kunmap_atomic(vaddr);
+ }
+
+ page = next_page;
+ off %= PAGE_SIZE;
+ }
+}
+
static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries)
{
int i, obj_idx, ret = 0;
@@ -2561,6 +2728,12 @@ next:
return 0;
}
+ /*
+ * Eviction fails on one of the handles, so we need to restore zspage.
+ * We need to rebuild its freelist (and free stored deferred handles),
+ * put it back to the correct size class, and add it to the LRU list.
+ */
+ restore_freelist(pool, class, zspage);
putback_zspage(class, zspage);
list_add(&zspage->lru, &pool->lru);
unlock_zspage(zspage);
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 9630b1275557..82c7005ede65 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -305,13 +305,12 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
kfree(priv);
}
-static int xen_9pfs_front_remove(struct xenbus_device *dev)
+static void xen_9pfs_front_remove(struct xenbus_device *dev)
{
struct xen_9pfs_front_priv *priv = dev_get_drvdata(&dev->dev);
dev_set_drvdata(&dev->dev, NULL);
xen_9pfs_front_free(priv);
- return 0;
}
static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index d3e542c2fc3e..acf563fbdfd9 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -821,6 +821,7 @@ static void terminate_big_destroy(struct hci_dev *hdev, void *data, int err)
static int hci_le_terminate_big(struct hci_dev *hdev, u8 big, u8 bis)
{
struct iso_list_data *d;
+ int ret;
bt_dev_dbg(hdev, "big 0x%2.2x bis 0x%2.2x", big, bis);
@@ -831,8 +832,12 @@ static int hci_le_terminate_big(struct hci_dev *hdev, u8 big, u8 bis)
d->big = big;
d->bis = bis;
- return hci_cmd_sync_queue(hdev, terminate_big_sync, d,
- terminate_big_destroy);
+ ret = hci_cmd_sync_queue(hdev, terminate_big_sync, d,
+ terminate_big_destroy);
+ if (ret)
+ kfree(d);
+
+ return ret;
}
static int big_terminate_sync(struct hci_dev *hdev, void *data)
@@ -857,6 +862,7 @@ static int big_terminate_sync(struct hci_dev *hdev, void *data)
static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, u16 sync_handle)
{
struct iso_list_data *d;
+ int ret;
bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", big, sync_handle);
@@ -867,8 +873,12 @@ static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, u16 sync_handle)
d->big = big;
d->sync_handle = sync_handle;
- return hci_cmd_sync_queue(hdev, big_terminate_sync, d,
- terminate_big_destroy);
+ ret = hci_cmd_sync_queue(hdev, big_terminate_sync, d,
+ terminate_big_destroy);
+ if (ret)
+ kfree(d);
+
+ return ret;
}
/* Cleanup BIS connection
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0594af4e37ca..ad92a4be5851 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3848,8 +3848,11 @@ static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data,
conn->handle, conn->link);
/* Create CIS if LE is already connected */
- if (conn->link && conn->link->state == BT_CONNECTED)
+ if (conn->link && conn->link->state == BT_CONNECTED) {
+ rcu_read_unlock();
hci_le_create_cis(conn->link);
+ rcu_read_lock();
+ }
if (i == rp->num_handles)
break;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 9e2d7e4b850c..117eedb6f709 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -3572,7 +3572,7 @@ static const struct hci_init_stage hci_init2[] = {
static int hci_le_read_buffer_size_sync(struct hci_dev *hdev)
{
/* Use Read LE Buffer Size V2 if supported */
- if (hdev->commands[41] & 0x20)
+ if (iso_capable(hdev) && hdev->commands[41] & 0x20)
return __hci_cmd_sync_status(hdev,
HCI_OP_LE_READ_BUFFER_SIZE_V2,
0, NULL, HCI_CMD_TIMEOUT);
@@ -3597,10 +3597,10 @@ static int hci_le_read_supported_states_sync(struct hci_dev *hdev)
/* LE Controller init stage 2 command sequence */
static const struct hci_init_stage le_init2[] = {
- /* HCI_OP_LE_READ_BUFFER_SIZE */
- HCI_INIT(hci_le_read_buffer_size_sync),
/* HCI_OP_LE_READ_LOCAL_FEATURES */
HCI_INIT(hci_le_read_local_features_sync),
+ /* HCI_OP_LE_READ_BUFFER_SIZE */
+ HCI_INIT(hci_le_read_buffer_size_sync),
/* HCI_OP_LE_READ_SUPPORTED_STATES */
HCI_INIT(hci_le_read_supported_states_sync),
{}
@@ -6187,20 +6187,13 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
static int _update_adv_data_sync(struct hci_dev *hdev, void *data)
{
- u8 instance = *(u8 *)data;
-
- kfree(data);
+ u8 instance = PTR_ERR(data);
return hci_update_adv_data_sync(hdev, instance);
}
int hci_update_adv_data(struct hci_dev *hdev, u8 instance)
{
- u8 *inst_ptr = kmalloc(1, GFP_KERNEL);
-
- if (!inst_ptr)
- return -ENOMEM;
-
- *inst_ptr = instance;
- return hci_cmd_sync_queue(hdev, _update_adv_data_sync, inst_ptr, NULL);
+ return hci_cmd_sync_queue(hdev, _update_adv_data_sync,
+ ERR_PTR(instance), NULL);
}
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 035bb5d25f85..24444b502e58 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -289,15 +289,15 @@ static int iso_connect_bis(struct sock *sk)
hci_dev_unlock(hdev);
hci_dev_put(hdev);
+ err = iso_chan_add(conn, sk, NULL);
+ if (err)
+ return err;
+
lock_sock(sk);
/* Update source addr of the socket */
bacpy(&iso_pi(sk)->src, &hcon->src);
- err = iso_chan_add(conn, sk, NULL);
- if (err)
- goto release;
-
if (hcon->state == BT_CONNECTED) {
iso_sock_clear_timer(sk);
sk->sk_state = BT_CONNECTED;
@@ -306,7 +306,6 @@ static int iso_connect_bis(struct sock *sk)
iso_sock_set_timer(sk, sk->sk_sndtimeo);
}
-release:
release_sock(sk);
return err;
@@ -372,15 +371,15 @@ static int iso_connect_cis(struct sock *sk)
hci_dev_unlock(hdev);
hci_dev_put(hdev);
+ err = iso_chan_add(conn, sk, NULL);
+ if (err)
+ return err;
+
lock_sock(sk);
/* Update source addr of the socket */
bacpy(&iso_pi(sk)->src, &hcon->src);
- err = iso_chan_add(conn, sk, NULL);
- if (err)
- goto release;
-
if (hcon->state == BT_CONNECTED) {
iso_sock_clear_timer(sk);
sk->sk_state = BT_CONNECTED;
@@ -392,7 +391,6 @@ static int iso_connect_cis(struct sock *sk)
iso_sock_set_timer(sk, sk->sk_sndtimeo);
}
-release:
release_sock(sk);
return err;
@@ -895,13 +893,10 @@ static int iso_listen_bis(struct sock *sk)
if (!hdev)
return -EHOSTUNREACH;
- hci_dev_lock(hdev);
-
err = hci_pa_create_sync(hdev, &iso_pi(sk)->dst,
le_addr_type(iso_pi(sk)->dst_type),
iso_pi(sk)->bc_sid);
- hci_dev_unlock(hdev);
hci_dev_put(hdev);
return err;
@@ -1432,33 +1427,29 @@ static void iso_conn_ready(struct iso_conn *conn)
struct sock *parent;
struct sock *sk = conn->sk;
struct hci_ev_le_big_sync_estabilished *ev;
+ struct hci_conn *hcon;
BT_DBG("conn %p", conn);
if (sk) {
iso_sock_ready(conn->sk);
} else {
- iso_conn_lock(conn);
-
- if (!conn->hcon) {
- iso_conn_unlock(conn);
+ hcon = conn->hcon;
+ if (!hcon)
return;
- }
- ev = hci_recv_event_data(conn->hcon->hdev,
+ ev = hci_recv_event_data(hcon->hdev,
HCI_EVT_LE_BIG_SYNC_ESTABILISHED);
if (ev)
- parent = iso_get_sock_listen(&conn->hcon->src,
- &conn->hcon->dst,
+ parent = iso_get_sock_listen(&hcon->src,
+ &hcon->dst,
iso_match_big, ev);
else
- parent = iso_get_sock_listen(&conn->hcon->src,
+ parent = iso_get_sock_listen(&hcon->src,
BDADDR_ANY, NULL, NULL);
- if (!parent) {
- iso_conn_unlock(conn);
+ if (!parent)
return;
- }
lock_sock(parent);
@@ -1466,30 +1457,29 @@ static void iso_conn_ready(struct iso_conn *conn)
BTPROTO_ISO, GFP_ATOMIC, 0);
if (!sk) {
release_sock(parent);
- iso_conn_unlock(conn);
return;
}
iso_sock_init(sk, parent);
- bacpy(&iso_pi(sk)->src, &conn->hcon->src);
- iso_pi(sk)->src_type = conn->hcon->src_type;
+ bacpy(&iso_pi(sk)->src, &hcon->src);
+ iso_pi(sk)->src_type = hcon->src_type;
/* If hcon has no destination address (BDADDR_ANY) it means it
* was created by HCI_EV_LE_BIG_SYNC_ESTABILISHED so we need to
* initialize using the parent socket destination address.
*/
- if (!bacmp(&conn->hcon->dst, BDADDR_ANY)) {
- bacpy(&conn->hcon->dst, &iso_pi(parent)->dst);
- conn->hcon->dst_type = iso_pi(parent)->dst_type;
- conn->hcon->sync_handle = iso_pi(parent)->sync_handle;
+ if (!bacmp(&hcon->dst, BDADDR_ANY)) {
+ bacpy(&hcon->dst, &iso_pi(parent)->dst);
+ hcon->dst_type = iso_pi(parent)->dst_type;
+ hcon->sync_handle = iso_pi(parent)->sync_handle;
}
- bacpy(&iso_pi(sk)->dst, &conn->hcon->dst);
- iso_pi(sk)->dst_type = conn->hcon->dst_type;
+ bacpy(&iso_pi(sk)->dst, &hcon->dst);
+ iso_pi(sk)->dst_type = hcon->dst_type;
- hci_conn_hold(conn->hcon);
- __iso_chan_add(conn, sk, parent);
+ hci_conn_hold(hcon);
+ iso_chan_add(conn, sk, parent);
if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags))
sk->sk_state = BT_CONNECT2;
@@ -1500,8 +1490,6 @@ static void iso_conn_ready(struct iso_conn *conn)
parent->sk_data_ready(parent);
release_sock(parent);
-
- iso_conn_unlock(conn);
}
}
diff --git a/net/bluetooth/mgmt_util.h b/net/bluetooth/mgmt_util.h
index 6a8b7e84293d..bdf978605d5a 100644
--- a/net/bluetooth/mgmt_util.h
+++ b/net/bluetooth/mgmt_util.h
@@ -27,7 +27,7 @@ struct mgmt_mesh_tx {
struct sock *sk;
u8 handle;
u8 instance;
- u8 param[sizeof(struct mgmt_cp_mesh_send) + 29];
+ u8 param[sizeof(struct mgmt_cp_mesh_send) + 31];
};
struct mgmt_pending_cmd {
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 21e24da4847f..4397e14ff560 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -391,6 +391,7 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a
addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
+ sock_hold(sk);
lock_sock(sk);
if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) {
@@ -410,14 +411,18 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a
d->sec_level = rfcomm_pi(sk)->sec_level;
d->role_switch = rfcomm_pi(sk)->role_switch;
+ /* Drop sock lock to avoid potential deadlock with the RFCOMM lock */
+ release_sock(sk);
err = rfcomm_dlc_open(d, &rfcomm_pi(sk)->src, &sa->rc_bdaddr,
sa->rc_channel);
- if (!err)
+ lock_sock(sk);
+ if (!err && !sock_flag(sk, SOCK_ZAPPED))
err = bt_sock_wait_state(sk, BT_CONNECTED,
sock_sndtimeo(sk, flags & O_NONBLOCK));
done:
release_sock(sk);
+ sock_put(sk);
return err;
}
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index f20f4373ff40..9554abcfd5b4 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -871,6 +871,7 @@ static unsigned int ip_sabotage_in(void *priv,
if (nf_bridge && !nf_bridge->in_prerouting &&
!netif_is_l3_master(skb->dev) &&
!netif_is_l3_slave(skb->dev)) {
+ nf_bridge_info_free(skb);
state->okfn(state->net, state->sk, skb);
return NF_STOLEN;
}
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 748be7253248..78c9729a6057 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -1015,6 +1015,7 @@ static void caif_sock_destructor(struct sock *sk)
return;
}
sk_stream_kill_queues(&cf_sk->sk);
+ WARN_ON_ONCE(sk->sk_forward_alloc);
caif_free_client(&cf_sk->layer);
}
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index cc405d8c7c30..8480684f2762 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -269,11 +269,15 @@ int cfctrl_linkup_request(struct cflayer *layer,
default:
pr_warn("Request setup of bad link type = %d\n",
param->linktype);
+ cfpkt_destroy(pkt);
return -EINVAL;
}
req = kzalloc(sizeof(*req), GFP_KERNEL);
- if (!req)
+ if (!req) {
+ cfpkt_destroy(pkt);
return -ENOMEM;
+ }
+
req->client_layer = user_layer;
req->cmd = CFCTRL_CMD_LINK_SETUP;
req->param = *param;
diff --git a/net/can/isotp.c b/net/can/isotp.c
index 608f8c24ae46..fc81d77724a1 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -140,7 +140,7 @@ struct isotp_sock {
canid_t rxid;
ktime_t tx_gap;
ktime_t lastrxcf_tstamp;
- struct hrtimer rxtimer, txtimer;
+ struct hrtimer rxtimer, txtimer, txfrtimer;
struct can_isotp_options opt;
struct can_isotp_fc_options rxfc, txfc;
struct can_isotp_ll_options ll;
@@ -871,7 +871,7 @@ static void isotp_rcv_echo(struct sk_buff *skb, void *data)
}
/* start timer to send next consecutive frame with correct delay */
- hrtimer_start(&so->txtimer, so->tx_gap, HRTIMER_MODE_REL_SOFT);
+ hrtimer_start(&so->txfrtimer, so->tx_gap, HRTIMER_MODE_REL_SOFT);
}
static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
@@ -879,49 +879,39 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
txtimer);
struct sock *sk = &so->sk;
- enum hrtimer_restart restart = HRTIMER_NORESTART;
- switch (so->tx.state) {
- case ISOTP_SENDING:
-
- /* cfecho should be consumed by isotp_rcv_echo() here */
- if (!so->cfecho) {
- /* start timeout for unlikely lost echo skb */
- hrtimer_set_expires(&so->txtimer,
- ktime_add(ktime_get(),
- ktime_set(ISOTP_ECHO_TIMEOUT, 0)));
- restart = HRTIMER_RESTART;
+ /* don't handle timeouts in IDLE state */
+ if (so->tx.state == ISOTP_IDLE)
+ return HRTIMER_NORESTART;
- /* push out the next consecutive frame */
- isotp_send_cframe(so);
- break;
- }
+ /* we did not get any flow control or echo frame in time */
- /* cfecho has not been cleared in isotp_rcv_echo() */
- pr_notice_once("can-isotp: cfecho %08X timeout\n", so->cfecho);
- fallthrough;
+ /* report 'communication error on send' */
+ sk->sk_err = ECOMM;
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk_error_report(sk);
- case ISOTP_WAIT_FC:
- case ISOTP_WAIT_FIRST_FC:
+ /* reset tx state */
+ so->tx.state = ISOTP_IDLE;
+ wake_up_interruptible(&so->wait);
- /* we did not get any flow control frame in time */
+ return HRTIMER_NORESTART;
+}
- /* report 'communication error on send' */
- sk->sk_err = ECOMM;
- if (!sock_flag(sk, SOCK_DEAD))
- sk_error_report(sk);
+static enum hrtimer_restart isotp_txfr_timer_handler(struct hrtimer *hrtimer)
+{
+ struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
+ txfrtimer);
- /* reset tx state */
- so->tx.state = ISOTP_IDLE;
- wake_up_interruptible(&so->wait);
- break;
+ /* start echo timeout handling and cover below protocol error */
+ hrtimer_start(&so->txtimer, ktime_set(ISOTP_ECHO_TIMEOUT, 0),
+ HRTIMER_MODE_REL_SOFT);
- default:
- WARN_ONCE(1, "can-isotp: tx timer state %08X cfecho %08X\n",
- so->tx.state, so->cfecho);
- }
+ /* cfecho should be consumed by isotp_rcv_echo() here */
+ if (so->tx.state == ISOTP_SENDING && !so->cfecho)
+ isotp_send_cframe(so);
- return restart;
+ return HRTIMER_NORESTART;
}
static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
@@ -1162,6 +1152,10 @@ static int isotp_release(struct socket *sock)
/* wait for complete transmission of current pdu */
wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+ /* force state machines to be idle also when a signal occurred */
+ so->tx.state = ISOTP_IDLE;
+ so->rx.state = ISOTP_IDLE;
+
spin_lock(&isotp_notifier_lock);
while (isotp_busy_notifier == so) {
spin_unlock(&isotp_notifier_lock);
@@ -1194,6 +1188,7 @@ static int isotp_release(struct socket *sock)
}
}
+ hrtimer_cancel(&so->txfrtimer);
hrtimer_cancel(&so->txtimer);
hrtimer_cancel(&so->rxtimer);
@@ -1597,6 +1592,8 @@ static int isotp_init(struct sock *sk)
so->rxtimer.function = isotp_rx_timer_handler;
hrtimer_init(&so->txtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
so->txtimer.function = isotp_tx_timer_handler;
+ hrtimer_init(&so->txfrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
+ so->txfrtimer.function = isotp_txfr_timer_handler;
init_waitqueue_head(&so->wait);
spin_lock_init(&so->rx_lock);
diff --git a/net/can/j1939/address-claim.c b/net/can/j1939/address-claim.c
index f33c47327927..ca4ad6cdd5cb 100644
--- a/net/can/j1939/address-claim.c
+++ b/net/can/j1939/address-claim.c
@@ -165,6 +165,46 @@ static void j1939_ac_process(struct j1939_priv *priv, struct sk_buff *skb)
* leaving this function.
*/
ecu = j1939_ecu_get_by_name_locked(priv, name);
+
+ if (ecu && ecu->addr == skcb->addr.sa) {
+ /* The ISO 11783-5 standard, in "4.5.2 - Address claim
+ * requirements", states:
+ * d) No CF shall begin, or resume, transmission on the
+ * network until 250 ms after it has successfully claimed
+ * an address except when responding to a request for
+ * address-claimed.
+ *
+ * But "Figure 6" and "Figure 7" in "4.5.4.2 - Address-claim
+ * prioritization" show that the CF begins the transmission
+ * after 250 ms from the first AC (address-claimed) message
+ * even if it sends another AC message during that time window
+ * to resolve the address contention with another CF.
+ *
+ * As stated in "4.4.2.3 - Address-claimed message":
+ * In order to successfully claim an address, the CF sending
+ * an address claimed message shall not receive a contending
+ * claim from another CF for at least 250 ms.
+ *
+ * As stated in "4.4.3.2 - NAME management (NM) message":
+ * 1) A commanding CF can
+ * d) request that a CF with a specified NAME transmit
+ * the address-claimed message with its current NAME.
+ * 2) A target CF shall
+ * d) send an address-claimed message in response to a
+ * request for a matching NAME
+ *
+ * Taking the above arguments into account, the 250 ms wait is
+ * requested only during network initialization.
+ *
+ * Do not restart the timer on AC message if both the NAME and
+ * the address match and so if the address has already been
+ * claimed (timer has expired) or the AC message has been sent
+ * to resolve the contention with another CF (timer is still
+ * running).
+ */
+ goto out_ecu_put;
+ }
+
if (!ecu && j1939_address_is_unicast(skcb->addr.sa))
ecu = j1939_ecu_create_locked(priv, name);
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 5c722b55fe23..fce9b9ebf13f 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1092,10 +1092,6 @@ static bool j1939_session_deactivate(struct j1939_session *session)
bool active;
j1939_session_list_lock(priv);
- /* This function should be called with a session ref-count of at
- * least 2.
- */
- WARN_ON_ONCE(kref_read(&session->kref) < 2);
active = j1939_session_deactivate_locked(session);
j1939_session_list_unlock(priv);
diff --git a/net/can/raw.c b/net/can/raw.c
index 81071cdb0301..ba86782ba8bb 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -132,8 +132,8 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
return;
/* make sure to not pass oversized frames to the socket */
- if ((can_is_canfd_skb(oskb) && !ro->fd_frames && !ro->xl_frames) ||
- (can_is_canxl_skb(oskb) && !ro->xl_frames))
+ if ((!ro->fd_frames && can_is_canfd_skb(oskb)) ||
+ (!ro->xl_frames && can_is_canxl_skb(oskb)))
return;
/* eliminate multiple filter matches for the same skb */
@@ -670,6 +670,11 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
if (copy_from_sockptr(&ro->fd_frames, optval, optlen))
return -EFAULT;
+ /* Enabling CAN XL includes CAN FD */
+ if (ro->xl_frames && !ro->fd_frames) {
+ ro->fd_frames = ro->xl_frames;
+ return -EINVAL;
+ }
break;
case CAN_RAW_XL_FRAMES:
@@ -679,6 +684,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
if (copy_from_sockptr(&ro->xl_frames, optval, optlen))
return -EFAULT;
+ /* Enabling CAN XL includes CAN FD */
+ if (ro->xl_frames)
+ ro->fd_frames = ro->xl_frames;
break;
case CAN_RAW_JOIN_FILTERS:
@@ -786,6 +794,25 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
return 0;
}
+static bool raw_bad_txframe(struct raw_sock *ro, struct sk_buff *skb, int mtu)
+{
+ /* Classical CAN -> no checks for flags and device capabilities */
+ if (can_is_can_skb(skb))
+ return false;
+
+ /* CAN FD -> needs to be enabled and a CAN FD or CAN XL device */
+ if (ro->fd_frames && can_is_canfd_skb(skb) &&
+ (mtu == CANFD_MTU || can_is_canxl_dev_mtu(mtu)))
+ return false;
+
+ /* CAN XL -> needs to be enabled and a CAN XL device */
+ if (ro->xl_frames && can_is_canxl_skb(skb) &&
+ can_is_canxl_dev_mtu(mtu))
+ return false;
+
+ return true;
+}
+
static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct sock *sk = sock->sk;
@@ -833,20 +860,8 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
goto free_skb;
err = -EINVAL;
- if (ro->xl_frames && can_is_canxl_dev_mtu(dev->mtu)) {
- /* CAN XL, CAN FD and Classical CAN */
- if (!can_is_canxl_skb(skb) && !can_is_canfd_skb(skb) &&
- !can_is_can_skb(skb))
- goto free_skb;
- } else if (ro->fd_frames && dev->mtu == CANFD_MTU) {
- /* CAN FD and Classical CAN */
- if (!can_is_canfd_skb(skb) && !can_is_can_skb(skb))
- goto free_skb;
- } else {
- /* Classical CAN */
- if (!can_is_can_skb(skb))
- goto free_skb;
- }
+ if (raw_bad_txframe(ro, skb, dev->mtu))
+ goto free_skb;
sockcm_init(&sockc, sk);
if (msg->msg_controllen) {
diff --git a/net/core/dev.c b/net/core/dev.c
index b76fb37b381e..f23e287602b7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1869,14 +1869,6 @@ static void __move_netdevice_notifier_net(struct net *src_net,
__register_netdevice_notifier_net(dst_net, nb, true);
}
-void move_netdevice_notifier_net(struct net *src_net, struct net *dst_net,
- struct notifier_block *nb)
-{
- rtnl_lock();
- __move_netdevice_notifier_net(src_net, dst_net, nb);
- rtnl_unlock();
-}
-
int register_netdevice_notifier_dev_net(struct net_device *dev,
struct notifier_block *nb,
struct netdev_net_notifier *nn)
@@ -10375,7 +10367,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64));
for (i = 0; i < n; i++)
- dst[i] = atomic_long_read(&src[i]);
+ dst[i] = (unsigned long)atomic_long_read(&src[i]);
/* zero out counters that only exist in rtnl_link_stats64 */
memset((char *)stats64 + n * sizeof(u64), 0,
sizeof(*stats64) - n * sizeof(u64));
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 032d6d0a5ce6..0bfc144df8b9 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4742,11 +4742,8 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,
if (err)
return err;
- if (dest_net && !net_eq(dest_net, curr_net)) {
- move_netdevice_notifier_net(curr_net, dest_net,
- &devlink->netdevice_nb);
+ if (dest_net && !net_eq(dest_net, curr_net))
write_pnet(&devlink->_net, dest_net);
- }
err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack);
devlink_reload_failed_set(devlink, !!err);
@@ -9979,7 +9976,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
goto err_xa_alloc;
devlink->netdevice_nb.notifier_call = devlink_netdevice_event;
- ret = register_netdevice_notifier_net(net, &devlink->netdevice_nb);
+ ret = register_netdevice_notifier(&devlink->netdevice_nb);
if (ret)
goto err_register_netdevice_notifier;
@@ -10171,8 +10168,7 @@ void devlink_free(struct devlink *devlink)
xa_destroy(&devlink->snapshot_ids);
xa_destroy(&devlink->ports);
- WARN_ON_ONCE(unregister_netdevice_notifier_net(devlink_net(devlink),
- &devlink->netdevice_nb));
+ WARN_ON_ONCE(unregister_netdevice_notifier(&devlink->netdevice_nb));
xa_erase(&devlinks, devlink->index);
@@ -10503,6 +10499,8 @@ static int devlink_netdevice_event(struct notifier_block *nb,
break;
case NETDEV_REGISTER:
case NETDEV_CHANGENAME:
+ if (devlink_net(devlink) != dev_net(netdev))
+ return NOTIFY_OK;
/* Set the netdev on top of previously set type. Note this
* event happens also during net namespace change so here
* we take into account netdev pointer appearing in this
@@ -10512,6 +10510,8 @@ static int devlink_netdevice_event(struct notifier_block *nb,
netdev);
break;
case NETDEV_UNREGISTER:
+ if (devlink_net(devlink) != dev_net(netdev))
+ return NOTIFY_OK;
/* Clear netdev pointer, but not the type. This event happens
* also during net namespace change so we need to clear
* pointer to netdev that is going to another net namespace.
diff --git a/net/core/filter.c b/net/core/filter.c
index 929358677183..43cc1fe58a2c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3180,15 +3180,18 @@ static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
{
+ void *old_data;
+
/* skb_ensure_writable() is not needed here, as we're
* already working on an uncloned skb.
*/
if (unlikely(!pskb_may_pull(skb, off + len)))
return -ENOMEM;
- skb_postpull_rcsum(skb, skb->data + off, len);
- memmove(skb->data + len, skb->data, off);
+ old_data = skb->data;
__skb_pull(skb, len);
+ skb_postpull_rcsum(skb, old_data + off, len);
+ memmove(skb->data, old_data, off);
return 0;
}
diff --git a/net/core/gro.c b/net/core/gro.c
index fd8c6a7e8d3e..4bac7ea6e025 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -162,6 +162,15 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
struct sk_buff *lp;
int segs;
+ /* Do not splice page pool based packets w/ non-page pool
+ * packets. This can result in reference count issues as page
+ * pool pages will not decrement the reference count and will
+ * instead be immediately returned to the pool or have frag
+ * count decremented.
+ */
+ if (p->pp_recycle != skb->pp_recycle)
+ return -ETOOMANYREFS;
+
/* pairs with WRITE_ONCE() in netif_set_gro_max_size() */
gro_max_size = READ_ONCE(p->dev->gro_max_size);
@@ -505,8 +514,9 @@ found_ptype:
NAPI_GRO_CB(skb)->count = 1;
if (unlikely(skb_is_gso(skb))) {
NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs;
- /* Only support TCP at the moment. */
- if (!skb_is_gso_tcp(skb))
+ /* Only support TCP and non DODGY users. */
+ if (!skb_is_gso_tcp(skb) ||
+ (skb_shinfo(skb)->gso_type & SKB_GSO_DODGY))
NAPI_GRO_CB(skb)->flush = 1;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f00a79fc301b..4edd2176e238 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -269,7 +269,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
(n->nud_state == NUD_NOARP) ||
(tbl->is_multicast &&
tbl->is_multicast(n->primary_key)) ||
- time_after(tref, n->updated))
+ !time_in_range(n->updated, tref, jiffies))
remove = true;
write_unlock(&n->lock);
@@ -289,7 +289,17 @@ static int neigh_forced_gc(struct neigh_table *tbl)
static void neigh_add_timer(struct neighbour *n, unsigned long when)
{
+ /* Use safe distance from the jiffies - LONG_MAX point while timer
+ * is running in DELAY/PROBE state but still show to user space
+ * large times in the past.
+ */
+ unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ);
+
neigh_hold(n);
+ if (!time_in_range(n->confirmed, mint, jiffies))
+ n->confirmed = mint;
+ if (time_before(n->used, n->confirmed))
+ n->used = n->confirmed;
if (unlikely(mod_timer(&n->timer, when))) {
printk("NEIGH: BUG, double timer add, state is %x\n",
n->nud_state);
@@ -1001,12 +1011,14 @@ static void neigh_periodic_work(struct work_struct *work)
goto next_elt;
}
- if (time_before(n->used, n->confirmed))
+ if (time_before(n->used, n->confirmed) &&
+ time_is_before_eq_jiffies(n->confirmed))
n->used = n->confirmed;
if (refcount_read(&n->refcnt) == 1 &&
(state == NUD_FAILED ||
- time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
+ !time_in_range_open(jiffies, n->used,
+ n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
*np = n->next;
neigh_mark_dead(n);
write_unlock(&n->lock);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 5581d22cc191..7b69cf882b8e 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -137,12 +137,12 @@ static int ops_init(const struct pernet_operations *ops, struct net *net)
return 0;
if (ops->id && ops->size) {
-cleanup:
ng = rcu_dereference_protected(net->gen,
lockdep_is_held(&pernet_ops_rwsem));
ng->ptr[*ops->id] = NULL;
}
+cleanup:
kfree(data);
out:
@@ -304,6 +304,12 @@ struct net *get_net_ns_by_id(const struct net *net, int id)
}
EXPORT_SYMBOL_GPL(get_net_ns_by_id);
+/* init code that must occur even if setup_net() is not called. */
+static __net_init void preinit_net(struct net *net)
+{
+ ref_tracker_dir_init(&net->notrefcnt_tracker, 128);
+}
+
/*
* setup_net runs the initializers for the network namespace object.
*/
@@ -316,7 +322,6 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
refcount_set(&net->ns.count, 1);
ref_tracker_dir_init(&net->refcnt_tracker, 128);
- ref_tracker_dir_init(&net->notrefcnt_tracker, 128);
refcount_set(&net->passive, 1);
get_random_bytes(&net->hash_mix, sizeof(u32));
@@ -472,6 +477,8 @@ struct net *copy_net_ns(unsigned long flags,
rv = -ENOMEM;
goto dec_ucounts;
}
+
+ preinit_net(net);
refcount_set(&net->passive, 1);
net->ucounts = ucounts;
get_user_ns(user_ns);
@@ -1118,6 +1125,7 @@ void __init net_ns_init(void)
init_net.key_domain = &init_net_key_domain;
#endif
down_write(&pernet_ops_rwsem);
+ preinit_net(&init_net);
if (setup_net(&init_net, &init_user_ns))
panic("Could not setup the initial network namespace");
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4a0eb5593275..a31ff4d83ecc 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4100,7 +4100,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
skb_shinfo(skb)->frag_list = NULL;
- do {
+ while (list_skb) {
nskb = list_skb;
list_skb = list_skb->next;
@@ -4146,8 +4146,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
if (skb_needs_linearize(nskb, features) &&
__skb_linearize(nskb))
goto err_linearize;
-
- } while (list_skb);
+ }
skb->truesize = skb->truesize - delta_truesize;
skb->data_len = skb->data_len - delta_len;
diff --git a/net/core/sock.c b/net/core/sock.c
index f954d5893e79..6f27c24016fe 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1531,6 +1531,8 @@ set_sndbuf:
ret = -EINVAL;
break;
}
+ if ((u8)val == SOCK_TXREHASH_DEFAULT)
+ val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
/* Paired with READ_ONCE() in tcp_rtx_synack() */
WRITE_ONCE(sk->sk_txrehash, (u8)val);
break;
@@ -3451,7 +3453,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_pacing_rate = ~0UL;
WRITE_ONCE(sk->sk_pacing_shift, 10);
sk->sk_incoming_cpu = -1;
- sk->sk_txrehash = SOCK_TXREHASH_DEFAULT;
sk_rx_queue_clear(sk);
/*
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 22fa2c5bc6ec..a68a7290a3b2 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1569,15 +1569,16 @@ void sock_map_unhash(struct sock *sk)
psock = sk_psock(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
- if (sk->sk_prot->unhash)
- sk->sk_prot->unhash(sk);
- return;
+ saved_unhash = READ_ONCE(sk->sk_prot)->unhash;
+ } else {
+ saved_unhash = psock->saved_unhash;
+ sock_map_remove_links(sk, psock);
+ rcu_read_unlock();
}
-
- saved_unhash = psock->saved_unhash;
- sock_map_remove_links(sk, psock);
- rcu_read_unlock();
- saved_unhash(sk);
+ if (WARN_ON_ONCE(saved_unhash == sock_map_unhash))
+ return;
+ if (saved_unhash)
+ saved_unhash(sk);
}
EXPORT_SYMBOL_GPL(sock_map_unhash);
@@ -1590,17 +1591,18 @@ void sock_map_destroy(struct sock *sk)
psock = sk_psock_get(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
- if (sk->sk_prot->destroy)
- sk->sk_prot->destroy(sk);
- return;
+ saved_destroy = READ_ONCE(sk->sk_prot)->destroy;
+ } else {
+ saved_destroy = psock->saved_destroy;
+ sock_map_remove_links(sk, psock);
+ rcu_read_unlock();
+ sk_psock_stop(psock);
+ sk_psock_put(sk, psock);
}
-
- saved_destroy = psock->saved_destroy;
- sock_map_remove_links(sk, psock);
- rcu_read_unlock();
- sk_psock_stop(psock);
- sk_psock_put(sk, psock);
- saved_destroy(sk);
+ if (WARN_ON_ONCE(saved_destroy == sock_map_destroy))
+ return;
+ if (saved_destroy)
+ saved_destroy(sk);
}
EXPORT_SYMBOL_GPL(sock_map_destroy);
@@ -1615,16 +1617,21 @@ void sock_map_close(struct sock *sk, long timeout)
if (unlikely(!psock)) {
rcu_read_unlock();
release_sock(sk);
- return sk->sk_prot->close(sk, timeout);
+ saved_close = READ_ONCE(sk->sk_prot)->close;
+ } else {
+ saved_close = psock->saved_close;
+ sock_map_remove_links(sk, psock);
+ rcu_read_unlock();
+ sk_psock_stop(psock);
+ release_sock(sk);
+ cancel_work_sync(&psock->work);
+ sk_psock_put(sk, psock);
}
-
- saved_close = psock->saved_close;
- sock_map_remove_links(sk, psock);
- rcu_read_unlock();
- sk_psock_stop(psock);
- release_sock(sk);
- cancel_work_sync(&psock->work);
- sk_psock_put(sk, psock);
+ /* Make sure we do not recurse. This is a bug.
+ * Leak the socket instead of crashing on a stack overflow.
+ */
+ if (WARN_ON_ONCE(saved_close == sock_map_close))
+ return;
saved_close(sk, timeout);
}
EXPORT_SYMBOL_GPL(sock_map_close);
diff --git a/net/core/stream.c b/net/core/stream.c
index cd06750dd329..434446ab14c5 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -209,7 +209,6 @@ void sk_stream_kill_queues(struct sock *sk)
sk_mem_reclaim_final(sk);
WARN_ON_ONCE(sk->sk_wmem_queued);
- WARN_ON_ONCE(sk->sk_forward_alloc);
/* It is _impossible_ for the backlog to contain anything
* when we get here. All user references to this socket
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 4260fe466993..b9d7c3dd1cb3 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -551,11 +551,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL);
/* Clone pktoptions received with SYN, if we own the req */
if (*own_req && ireq->pktopts) {
- newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
+ newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
consume_skb(ireq->pktopts);
ireq->pktopts = NULL;
- if (newnp->pktoptions)
- skb_set_owner_r(newnp->pktoptions, newsk);
}
return newsk;
@@ -615,7 +613,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
--ANK (980728)
*/
if (np->rxopt.all)
- opt_skb = skb_clone(skb, GFP_ATOMIC);
+ opt_skb = skb_clone_and_charge_r(skb, sk);
if (sk->sk_state == DCCP_OPEN) { /* Fast path */
if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len))
@@ -679,7 +677,6 @@ ipv6_pktoptions:
np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb,
&DCCP_SKB_CB(opt_skb)->header.h6)) {
- skb_set_owner_r(opt_skb, sk);
memmove(IP6CB(opt_skb),
&DCCP_SKB_CB(opt_skb)->header.h6,
sizeof(struct inet6_skb_parm));
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index c2f1a542e6fa..646b3e490c71 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -2078,58 +2078,91 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
return ret;
}
-static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
+static int ethtool_vzalloc_stats_array(int n_stats, u64 **data)
{
+ if (n_stats < 0)
+ return n_stats;
+ if (n_stats > S32_MAX / sizeof(u64))
+ return -ENOMEM;
+ if (WARN_ON_ONCE(!n_stats))
+ return -EOPNOTSUPP;
+
+ *data = vzalloc(array_size(n_stats, sizeof(u64)));
+ if (!*data)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int ethtool_get_phy_stats_phydev(struct phy_device *phydev,
+ struct ethtool_stats *stats,
+ u64 **data)
+ {
const struct ethtool_phy_ops *phy_ops = ethtool_phy_ops;
+ int n_stats, ret;
+
+ if (!phy_ops || !phy_ops->get_sset_count || !phy_ops->get_stats)
+ return -EOPNOTSUPP;
+
+ n_stats = phy_ops->get_sset_count(phydev);
+
+ ret = ethtool_vzalloc_stats_array(n_stats, data);
+ if (ret)
+ return ret;
+
+ stats->n_stats = n_stats;
+ return phy_ops->get_stats(phydev, stats, *data);
+}
+
+static int ethtool_get_phy_stats_ethtool(struct net_device *dev,
+ struct ethtool_stats *stats,
+ u64 **data)
+{
const struct ethtool_ops *ops = dev->ethtool_ops;
- struct phy_device *phydev = dev->phydev;
- struct ethtool_stats stats;
- u64 *data;
- int ret, n_stats;
+ int n_stats, ret;
- if (!phydev && (!ops->get_ethtool_phy_stats || !ops->get_sset_count))
+ if (!ops || !ops->get_sset_count || ops->get_ethtool_phy_stats)
return -EOPNOTSUPP;
- if (phydev && !ops->get_ethtool_phy_stats &&
- phy_ops && phy_ops->get_sset_count)
- n_stats = phy_ops->get_sset_count(phydev);
- else
- n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS);
- if (n_stats < 0)
- return n_stats;
- if (n_stats > S32_MAX / sizeof(u64))
- return -ENOMEM;
- WARN_ON_ONCE(!n_stats);
+ n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS);
+
+ ret = ethtool_vzalloc_stats_array(n_stats, data);
+ if (ret)
+ return ret;
+
+ stats->n_stats = n_stats;
+ ops->get_ethtool_phy_stats(dev, stats, *data);
+
+ return 0;
+}
+
+static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
+{
+ struct phy_device *phydev = dev->phydev;
+ struct ethtool_stats stats;
+ u64 *data = NULL;
+ int ret = -EOPNOTSUPP;
if (copy_from_user(&stats, useraddr, sizeof(stats)))
return -EFAULT;
- stats.n_stats = n_stats;
+ if (phydev)
+ ret = ethtool_get_phy_stats_phydev(phydev, &stats, &data);
- if (n_stats) {
- data = vzalloc(array_size(n_stats, sizeof(u64)));
- if (!data)
- return -ENOMEM;
+ if (ret == -EOPNOTSUPP)
+ ret = ethtool_get_phy_stats_ethtool(dev, &stats, &data);
- if (phydev && !ops->get_ethtool_phy_stats &&
- phy_ops && phy_ops->get_stats) {
- ret = phy_ops->get_stats(phydev, &stats, data);
- if (ret < 0)
- goto out;
- } else {
- ops->get_ethtool_phy_stats(dev, &stats, data);
- }
- } else {
- data = NULL;
- }
+ if (ret)
+ goto out;
- ret = -EFAULT;
- if (copy_to_user(useraddr, &stats, sizeof(stats)))
+ if (copy_to_user(useraddr, &stats, sizeof(stats))) {
+ ret = -EFAULT;
goto out;
+ }
+
useraddr += sizeof(stats);
- if (n_stats && copy_to_user(useraddr, data, array_size(n_stats, sizeof(u64))))
- goto out;
- ret = 0;
+ if (copy_to_user(useraddr, data, array_size(stats.n_stats, sizeof(u64))))
+ ret = -EFAULT;
out:
vfree(data);
diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
index ebe6145aed3f..be260ab34e58 100644
--- a/net/ethtool/rss.c
+++ b/net/ethtool/rss.c
@@ -122,10 +122,13 @@ rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base,
{
const struct rss_reply_data *data = RSS_REPDATA(reply_base);
- if (nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc) ||
- nla_put(skb, ETHTOOL_A_RSS_INDIR,
- sizeof(u32) * data->indir_size, data->indir_table) ||
- nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey))
+ if ((data->hfunc &&
+ nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) ||
+ (data->indir_size &&
+ nla_put(skb, ETHTOOL_A_RSS_INDIR,
+ sizeof(u32) * data->indir_size, data->indir_table)) ||
+ (data->hkey_size &&
+ nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey)))
return -EMSGSIZE;
return 0;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ab4a06be489b..cf11f10927e1 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -347,6 +347,7 @@ lookup_protocol:
sk->sk_destruct = inet_sock_destruct;
sk->sk_protocol = protocol;
sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
+ sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash);
inet->uc_ttl = -1;
inet->mc_loop = 1;
@@ -1665,6 +1666,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
if (rc == 0) {
*sk = sock->sk;
(*sk)->sk_allocation = GFP_ATOMIC;
+ (*sk)->sk_use_task_frag = false;
/*
* Unhash it so that IP input processing does not even see it,
* we do not wish this socket to see incoming packets.
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ce9ff3c62e84..3bb890a40ed7 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -30,6 +30,7 @@
#include <linux/slab.h>
#include <linux/netlink.h>
#include <linux/hash.h>
+#include <linux/nospec.h>
#include <net/arp.h>
#include <net/inet_dscp.h>
@@ -1022,6 +1023,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
if (type > RTAX_MAX)
return false;
+ type = array_index_nospec(type, RTAX_MAX + 1);
if (type == RTAX_CC_ALGO) {
char tmp[TCP_CA_NAME_MAX];
bool ecn_ca = false;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index b366ab9148f2..f2c43f67187d 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -173,22 +173,40 @@ static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2,
return false;
}
+static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2,
+ kuid_t sk_uid, bool relax,
+ bool reuseport_cb_ok, bool reuseport_ok)
+{
+ if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
+ return false;
+
+ return inet_bind_conflict(sk, sk2, sk_uid, relax,
+ reuseport_cb_ok, reuseport_ok);
+}
+
static bool inet_bhash2_conflict(const struct sock *sk,
const struct inet_bind2_bucket *tb2,
kuid_t sk_uid,
bool relax, bool reuseport_cb_ok,
bool reuseport_ok)
{
+ struct inet_timewait_sock *tw2;
struct sock *sk2;
sk_for_each_bound_bhash2(sk2, &tb2->owners) {
- if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
- continue;
+ if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax,
+ reuseport_cb_ok, reuseport_ok))
+ return true;
+ }
+
+ twsk_for_each_bound_bhash2(tw2, &tb2->deathrow) {
+ sk2 = (struct sock *)tw2;
- if (inet_bind_conflict(sk, sk2, sk_uid, relax,
- reuseport_cb_ok, reuseport_ok))
+ if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax,
+ reuseport_cb_ok, reuseport_ok))
return true;
}
+
return false;
}
@@ -1182,20 +1200,31 @@ void inet_csk_prepare_forced_close(struct sock *sk)
}
EXPORT_SYMBOL(inet_csk_prepare_forced_close);
+static int inet_ulp_can_listen(const struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ if (icsk->icsk_ulp_ops && !icsk->icsk_ulp_ops->clone)
+ return -EINVAL;
+
+ return 0;
+}
+
int inet_csk_listen_start(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet = inet_sk(sk);
int err;
+ err = inet_ulp_can_listen(sk);
+ if (unlikely(err))
+ return err;
+
reqsk_queue_alloc(&icsk->icsk_accept_queue);
sk->sk_ack_backlog = 0;
inet_csk_delack_init(sk);
- if (sk->sk_txrehash == SOCK_TXREHASH_DEFAULT)
- sk->sk_txrehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
-
/* There is race window here: we announce ourselves listening,
* but this transition is still not validated by get_port().
* It is OK, because this socket enters to hash table only
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index d039b4e732a3..f58d73888638 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -116,6 +116,7 @@ static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb,
#endif
tb->rcv_saddr = sk->sk_rcv_saddr;
INIT_HLIST_HEAD(&tb->owners);
+ INIT_HLIST_HEAD(&tb->deathrow);
hlist_add_head(&tb->node, &head->chain);
}
@@ -137,7 +138,7 @@ struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep,
/* Caller must hold hashbucket lock for this tb with local BH disabled */
void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb)
{
- if (hlist_empty(&tb->owners)) {
+ if (hlist_empty(&tb->owners) && hlist_empty(&tb->deathrow)) {
__hlist_del(&tb->node);
kmem_cache_free(cachep, tb);
}
@@ -649,8 +650,20 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
spin_lock(lock);
if (osk) {
WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
- ret = sk_nulls_del_node_init_rcu(osk);
- } else if (found_dup_sk) {
+ ret = sk_hashed(osk);
+ if (ret) {
+ /* Before deleting the node, we insert a new one to make
+ * sure that the look-up-sk process would not miss either
+ * of them and that at least one node would exist in ehash
+ * table all the time. Otherwise there's a tiny chance
+ * that lookup process could find nothing in ehash table.
+ */
+ __sk_nulls_add_node_tail_rcu(sk, list);
+ sk_nulls_del_node_init_rcu(osk);
+ }
+ goto unlock;
+ }
+ if (found_dup_sk) {
*found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
if (*found_dup_sk)
ret = false;
@@ -659,6 +672,7 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
if (ret)
__sk_nulls_add_node_rcu(sk, list);
+unlock:
spin_unlock(lock);
return ret;
@@ -1103,15 +1117,16 @@ ok:
/* Head lock still held and bh's disabled */
inet_bind_hash(sk, tb, tb2, port);
- spin_unlock(&head2->lock);
-
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
}
if (tw)
inet_twsk_bind_unhash(tw, hinfo);
+
+ spin_unlock(&head2->lock);
spin_unlock(&head->lock);
+
if (tw)
inet_twsk_deschedule_put(tw);
local_bh_enable();
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 66fc940f9521..beed32fff484 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -29,6 +29,7 @@
void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
struct inet_hashinfo *hashinfo)
{
+ struct inet_bind2_bucket *tb2 = tw->tw_tb2;
struct inet_bind_bucket *tb = tw->tw_tb;
if (!tb)
@@ -37,6 +38,11 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
__hlist_del(&tw->tw_bind_node);
tw->tw_tb = NULL;
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
+
+ __hlist_del(&tw->tw_bind2_node);
+ tw->tw_tb2 = NULL;
+ inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2);
+
__sock_put((struct sock *)tw);
}
@@ -45,7 +51,7 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
{
struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo;
spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
- struct inet_bind_hashbucket *bhead;
+ struct inet_bind_hashbucket *bhead, *bhead2;
spin_lock(lock);
sk_nulls_del_node_init_rcu((struct sock *)tw);
@@ -54,9 +60,13 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
/* Disassociate with bind bucket. */
bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
hashinfo->bhash_size)];
+ bhead2 = inet_bhashfn_portaddr(hashinfo, (struct sock *)tw,
+ twsk_net(tw), tw->tw_num);
spin_lock(&bhead->lock);
+ spin_lock(&bhead2->lock);
inet_twsk_bind_unhash(tw, hashinfo);
+ spin_unlock(&bhead2->lock);
spin_unlock(&bhead->lock);
refcount_dec(&tw->tw_dr->tw_refcount);
@@ -81,10 +91,10 @@ void inet_twsk_put(struct inet_timewait_sock *tw)
}
EXPORT_SYMBOL_GPL(inet_twsk_put);
-static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
- struct hlist_nulls_head *list)
+static void inet_twsk_add_node_tail_rcu(struct inet_timewait_sock *tw,
+ struct hlist_nulls_head *list)
{
- hlist_nulls_add_head_rcu(&tw->tw_node, list);
+ hlist_nulls_add_tail_rcu(&tw->tw_node, list);
}
static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
@@ -93,6 +103,12 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
hlist_add_head(&tw->tw_bind_node, list);
}
+static void inet_twsk_add_bind2_node(struct inet_timewait_sock *tw,
+ struct hlist_head *list)
+{
+ hlist_add_head(&tw->tw_bind2_node, list);
+}
+
/*
* Enter the time wait state. This is called with locally disabled BH.
* Essentially we whip up a timewait bucket, copy the relevant info into it
@@ -105,22 +121,33 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
const struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
- struct inet_bind_hashbucket *bhead;
+ struct inet_bind_hashbucket *bhead, *bhead2;
+
/* Step 1: Put TW into bind hash. Original socket stays there too.
Note, that any socket with inet->num != 0 MUST be bound in
binding cache, even if it is closed.
*/
bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
hashinfo->bhash_size)];
+ bhead2 = inet_bhashfn_portaddr(hashinfo, sk, twsk_net(tw), inet->inet_num);
+
spin_lock(&bhead->lock);
+ spin_lock(&bhead2->lock);
+
tw->tw_tb = icsk->icsk_bind_hash;
WARN_ON(!icsk->icsk_bind_hash);
inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
+
+ tw->tw_tb2 = icsk->icsk_bind2_hash;
+ WARN_ON(!icsk->icsk_bind2_hash);
+ inet_twsk_add_bind2_node(tw, &tw->tw_tb2->deathrow);
+
+ spin_unlock(&bhead2->lock);
spin_unlock(&bhead->lock);
spin_lock(lock);
- inet_twsk_add_node_rcu(tw, &ehead->chain);
+ inet_twsk_add_node_tail_rcu(tw, &ehead->chain);
/* Step 3: Remove SK from hash chain */
if (__sk_nulls_del_node_init_rcu(sk))
diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c
index 7fcfdfd8f9de..0e3ee1532848 100644
--- a/net/ipv4/metrics.c
+++ b/net/ipv4/metrics.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/netlink.h>
+#include <linux/nospec.h>
#include <linux/rtnetlink.h>
#include <linux/types.h>
#include <net/ip.h>
@@ -25,6 +26,7 @@ static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx,
return -EINVAL;
}
+ type = array_index_nospec(type, RTAX_MAX + 1);
if (type == RTAX_CC_ALGO) {
char tmp[TCP_CA_NAME_MAX];
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c567d5e8053e..33f559f491c8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -435,6 +435,7 @@ void tcp_init_sock(struct sock *sk)
/* There's a bubble in the pipe until at least the first ACK. */
tp->app_limited = ~0U;
+ tp->rate_app_limited = 1;
/* See draft-stevens-tcpca-spec-01 for discussion of the
* initialization of these values.
@@ -3178,6 +3179,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->plb_rehash = 0;
/* There's a bubble in the pipe until at least the first ACK. */
tp->app_limited = ~0U;
+ tp->rate_app_limited = 1;
tp->rack.mstamp = 0;
tp->rack.advanced = 0;
tp->rack.reo_wnd_steps = 1;
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 94aad3870c5f..cf26d65ca389 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -6,6 +6,7 @@
#include <linux/bpf.h>
#include <linux/init.h>
#include <linux/wait.h>
+#include <linux/util_macros.h>
#include <net/inet_common.h>
#include <net/tls.h>
@@ -639,10 +640,9 @@ EXPORT_SYMBOL_GPL(tcp_bpf_update_proto);
*/
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
{
- int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
struct proto *prot = newsk->sk_prot;
- if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE])
+ if (is_insidevar(prot, tcp_bpf_prots))
newsk->sk_prot = sk->sk_prot_creator;
}
#endif /* CONFIG_BPF_SYSCALL */
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index 9ae50b1bd844..2aa442128630 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -139,6 +139,10 @@ static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops)
if (sk->sk_socket)
clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
+ err = -ENOTCONN;
+ if (!ulp_ops->clone && sk->sk_state == TCP_LISTEN)
+ goto out_err;
+
err = ulp_ops->init(sk);
if (err)
goto out_err;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f7a84a4acffc..faa47f9ea73a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3127,17 +3127,17 @@ static void add_v4_addrs(struct inet6_dev *idev)
offset = sizeof(struct in6_addr) - 4;
memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4);
- if (idev->dev->flags&IFF_POINTOPOINT) {
+ if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) {
+ scope = IPV6_ADDR_COMPATv4;
+ plen = 96;
+ pflags |= RTF_NONEXTHOP;
+ } else {
if (idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_NONE)
return;
addr.s6_addr32[0] = htonl(0xfe800000);
scope = IFA_LINK;
plen = 64;
- } else {
- scope = IPV6_ADDR_COMPATv4;
- plen = 96;
- pflags |= RTF_NONEXTHOP;
}
if (addr.s6_addr32[3]) {
@@ -3447,6 +3447,30 @@ static void addrconf_gre_config(struct net_device *dev)
}
#endif
+static void addrconf_init_auto_addrs(struct net_device *dev)
+{
+ switch (dev->type) {
+#if IS_ENABLED(CONFIG_IPV6_SIT)
+ case ARPHRD_SIT:
+ addrconf_sit_config(dev);
+ break;
+#endif
+#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
+ case ARPHRD_IP6GRE:
+ case ARPHRD_IPGRE:
+ addrconf_gre_config(dev);
+ break;
+#endif
+ case ARPHRD_LOOPBACK:
+ init_loopback(dev);
+ break;
+
+ default:
+ addrconf_dev_config(dev);
+ break;
+ }
+}
+
static int fixup_permanent_addr(struct net *net,
struct inet6_dev *idev,
struct inet6_ifaddr *ifp)
@@ -3615,26 +3639,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
run_pending = 1;
}
- switch (dev->type) {
-#if IS_ENABLED(CONFIG_IPV6_SIT)
- case ARPHRD_SIT:
- addrconf_sit_config(dev);
- break;
-#endif
-#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
- case ARPHRD_IP6GRE:
- case ARPHRD_IPGRE:
- addrconf_gre_config(dev);
- break;
-#endif
- case ARPHRD_LOOPBACK:
- init_loopback(dev);
- break;
-
- default:
- addrconf_dev_config(dev);
- break;
- }
+ addrconf_init_auto_addrs(dev);
if (!IS_ERR_OR_NULL(idev)) {
if (run_pending)
@@ -6397,7 +6402,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
if (idev->cnf.addr_gen_mode != new_val) {
idev->cnf.addr_gen_mode = new_val;
- addrconf_dev_config(idev->dev);
+ addrconf_init_auto_addrs(idev->dev);
}
} else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) {
struct net_device *dev;
@@ -6408,7 +6413,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
if (idev &&
idev->cnf.addr_gen_mode != new_val) {
idev->cnf.addr_gen_mode = new_val;
- addrconf_dev_config(idev->dev);
+ addrconf_init_auto_addrs(idev->dev);
}
}
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index fee9163382c2..847934763868 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -222,6 +222,7 @@ lookup_protocol:
np->pmtudisc = IPV6_PMTUDISC_WANT;
np->repflow = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED;
sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
+ sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash);
/* Init the ipv4 part of the socket since we can have sockets
* using v6 API for ipv4.
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index e624497fa992..9b6818453afe 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -51,7 +51,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk)
fl6->flowi6_mark = sk->sk_mark;
fl6->fl6_dport = inet->inet_dport;
fl6->fl6_sport = inet->inet_sport;
- fl6->flowlabel = np->flow_label;
+ fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
fl6->flowi6_uid = sk->sk_uid;
if (!oif)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 60fd91bb5171..c314fdde0097 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -547,7 +547,20 @@ int ip6_forward(struct sk_buff *skb)
pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
int proxied = ip6_forward_proxy_check(skb);
if (proxied > 0) {
- hdr->hop_limit--;
+ /* It's tempting to decrease the hop limit
+ * here by 1, as we do at the end of the
+ * function too.
+ *
+ * But that would be incorrect, as proxying is
+ * not forwarding. The ip6_input function
+ * will handle this packet locally, and it
+ * depends on the hop limit being unchanged.
+ *
+ * One example is the NDP hop limit, that
+ * always has to stay 255, but other would be
+ * similar checks around RA packets, where the
+ * user can even change the desired limit.
+ */
return ip6_input(skb);
} else if (proxied < 0) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a06a9f847db5..ada087b50541 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -505,6 +505,7 @@ csum_copy_err:
static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
struct raw6_sock *rp)
{
+ struct ipv6_txoptions *opt;
struct sk_buff *skb;
int err = 0;
int offset;
@@ -522,6 +523,9 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
offset = rp->offset;
total_len = inet_sk(sk)->cork.base.length;
+ opt = inet6_sk(sk)->cork.opt;
+ total_len -= opt ? opt->opt_flen : 0;
+
if (offset >= total_len - 1) {
err = -EINVAL;
ip6_flush_pending_frames(sk);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 11b736a76bd7..a52a4f12f146 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -272,6 +272,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6.flowi6_proto = IPPROTO_TCP;
fl6.daddr = sk->sk_v6_daddr;
fl6.saddr = saddr ? *saddr : np->saddr;
+ fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = sk->sk_mark;
fl6.fl6_dport = usin->sin6_port;
@@ -1387,14 +1388,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
/* Clone pktoptions received with SYN, if we own the req */
if (ireq->pktopts) {
- newnp->pktoptions = skb_clone(ireq->pktopts,
- sk_gfp_mask(sk, GFP_ATOMIC));
+ newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
consume_skb(ireq->pktopts);
ireq->pktopts = NULL;
- if (newnp->pktoptions) {
+ if (newnp->pktoptions)
tcp_v6_restore_cb(newnp->pktoptions);
- skb_set_owner_r(newnp->pktoptions, newsk);
- }
}
} else {
if (!req_unhash && found_dup_sk) {
@@ -1466,7 +1464,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
--ANK (980728)
*/
if (np->rxopt.all)
- opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
+ opt_skb = skb_clone_and_charge_r(skb, sk);
reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
@@ -1552,7 +1550,6 @@ ipv6_pktoptions:
if (np->repflow)
np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
- skb_set_owner_r(opt_skb, sk);
tcp_v6_restore_cb(opt_skb);
opt_skb = xchg(&np->pktoptions, opt_skb);
} else {
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 2bdbcec781cd..a815f5ab4c49 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1261,7 +1261,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
const struct sadb_x_nat_t_type* n_type;
struct xfrm_encap_tmpl *natt;
- x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL);
+ x->encap = kzalloc(sizeof(*x->encap), GFP_KERNEL);
if (!x->encap) {
err = -ENOMEM;
goto out;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 9a1415fe3fa7..03608d3ded4b 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -104,9 +104,9 @@ static struct workqueue_struct *l2tp_wq;
/* per-net private data for this module */
static unsigned int l2tp_net_id;
struct l2tp_net {
- struct list_head l2tp_tunnel_list;
- /* Lock for write access to l2tp_tunnel_list */
- spinlock_t l2tp_tunnel_list_lock;
+ /* Lock for write access to l2tp_tunnel_idr */
+ spinlock_t l2tp_tunnel_idr_lock;
+ struct idr l2tp_tunnel_idr;
struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2];
/* Lock for write access to l2tp_session_hlist */
spinlock_t l2tp_session_hlist_lock;
@@ -208,13 +208,10 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
struct l2tp_tunnel *tunnel;
rcu_read_lock_bh();
- list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- if (tunnel->tunnel_id == tunnel_id &&
- refcount_inc_not_zero(&tunnel->ref_count)) {
- rcu_read_unlock_bh();
-
- return tunnel;
- }
+ tunnel = idr_find(&pn->l2tp_tunnel_idr, tunnel_id);
+ if (tunnel && refcount_inc_not_zero(&tunnel->ref_count)) {
+ rcu_read_unlock_bh();
+ return tunnel;
}
rcu_read_unlock_bh();
@@ -224,13 +221,14 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_get);
struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth)
{
- const struct l2tp_net *pn = l2tp_pernet(net);
+ struct l2tp_net *pn = l2tp_pernet(net);
+ unsigned long tunnel_id, tmp;
struct l2tp_tunnel *tunnel;
int count = 0;
rcu_read_lock_bh();
- list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- if (++count > nth &&
+ idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
+ if (tunnel && ++count > nth &&
refcount_inc_not_zero(&tunnel->ref_count)) {
rcu_read_unlock_bh();
return tunnel;
@@ -1043,7 +1041,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns
IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED);
nf_reset_ct(skb);
- bh_lock_sock(sk);
+ bh_lock_sock_nested(sk);
if (sock_owned_by_user(sk)) {
kfree_skb(skb);
ret = NET_XMIT_DROP;
@@ -1227,6 +1225,15 @@ static void l2tp_udp_encap_destroy(struct sock *sk)
l2tp_tunnel_delete(tunnel);
}
+static void l2tp_tunnel_remove(struct net *net, struct l2tp_tunnel *tunnel)
+{
+ struct l2tp_net *pn = l2tp_pernet(net);
+
+ spin_lock_bh(&pn->l2tp_tunnel_idr_lock);
+ idr_remove(&pn->l2tp_tunnel_idr, tunnel->tunnel_id);
+ spin_unlock_bh(&pn->l2tp_tunnel_idr_lock);
+}
+
/* Workqueue tunnel deletion function */
static void l2tp_tunnel_del_work(struct work_struct *work)
{
@@ -1234,7 +1241,6 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
del_work);
struct sock *sk = tunnel->sock;
struct socket *sock = sk->sk_socket;
- struct l2tp_net *pn;
l2tp_tunnel_closeall(tunnel);
@@ -1248,12 +1254,7 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
}
}
- /* Remove the tunnel struct from the tunnel list */
- pn = l2tp_pernet(tunnel->l2tp_net);
- spin_lock_bh(&pn->l2tp_tunnel_list_lock);
- list_del_rcu(&tunnel->list);
- spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
-
+ l2tp_tunnel_remove(tunnel->l2tp_net, tunnel);
/* drop initial ref */
l2tp_tunnel_dec_refcount(tunnel);
@@ -1384,8 +1385,6 @@ out:
return err;
}
-static struct lock_class_key l2tp_socket_class;
-
int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id,
struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
{
@@ -1455,12 +1454,19 @@ static int l2tp_validate_socket(const struct sock *sk, const struct net *net,
int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
struct l2tp_tunnel_cfg *cfg)
{
- struct l2tp_tunnel *tunnel_walk;
- struct l2tp_net *pn;
+ struct l2tp_net *pn = l2tp_pernet(net);
+ u32 tunnel_id = tunnel->tunnel_id;
struct socket *sock;
struct sock *sk;
int ret;
+ spin_lock_bh(&pn->l2tp_tunnel_idr_lock);
+ ret = idr_alloc_u32(&pn->l2tp_tunnel_idr, NULL, &tunnel_id, tunnel_id,
+ GFP_ATOMIC);
+ spin_unlock_bh(&pn->l2tp_tunnel_idr_lock);
+ if (ret)
+ return ret == -ENOSPC ? -EEXIST : ret;
+
if (tunnel->fd < 0) {
ret = l2tp_tunnel_sock_create(net, tunnel->tunnel_id,
tunnel->peer_tunnel_id, cfg,
@@ -1474,6 +1480,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
}
sk = sock->sk;
+ lock_sock(sk);
write_lock_bh(&sk->sk_callback_lock);
ret = l2tp_validate_socket(sk, net, tunnel->encap);
if (ret < 0)
@@ -1481,24 +1488,6 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
rcu_assign_sk_user_data(sk, tunnel);
write_unlock_bh(&sk->sk_callback_lock);
- tunnel->l2tp_net = net;
- pn = l2tp_pernet(net);
-
- sock_hold(sk);
- tunnel->sock = sk;
-
- spin_lock_bh(&pn->l2tp_tunnel_list_lock);
- list_for_each_entry(tunnel_walk, &pn->l2tp_tunnel_list, list) {
- if (tunnel_walk->tunnel_id == tunnel->tunnel_id) {
- spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
- sock_put(sk);
- ret = -EEXIST;
- goto err_sock;
- }
- }
- list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
- spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
-
if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
struct udp_tunnel_sock_cfg udp_cfg = {
.sk_user_data = tunnel,
@@ -1512,9 +1501,16 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
tunnel->old_sk_destruct = sk->sk_destruct;
sk->sk_destruct = &l2tp_tunnel_destruct;
- lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class,
- "l2tp_sock");
sk->sk_allocation = GFP_ATOMIC;
+ release_sock(sk);
+
+ sock_hold(sk);
+ tunnel->sock = sk;
+ tunnel->l2tp_net = net;
+
+ spin_lock_bh(&pn->l2tp_tunnel_idr_lock);
+ idr_replace(&pn->l2tp_tunnel_idr, tunnel, tunnel->tunnel_id);
+ spin_unlock_bh(&pn->l2tp_tunnel_idr_lock);
trace_register_tunnel(tunnel);
@@ -1523,17 +1519,16 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
return 0;
-err_sock:
- write_lock_bh(&sk->sk_callback_lock);
- rcu_assign_sk_user_data(sk, NULL);
err_inval_sock:
write_unlock_bh(&sk->sk_callback_lock);
+ release_sock(sk);
if (tunnel->fd < 0)
sock_release(sock);
else
sockfd_put(sock);
err:
+ l2tp_tunnel_remove(net, tunnel);
return ret;
}
EXPORT_SYMBOL_GPL(l2tp_tunnel_register);
@@ -1647,8 +1642,8 @@ static __net_init int l2tp_init_net(struct net *net)
struct l2tp_net *pn = net_generic(net, l2tp_net_id);
int hash;
- INIT_LIST_HEAD(&pn->l2tp_tunnel_list);
- spin_lock_init(&pn->l2tp_tunnel_list_lock);
+ idr_init(&pn->l2tp_tunnel_idr);
+ spin_lock_init(&pn->l2tp_tunnel_idr_lock);
for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
INIT_HLIST_HEAD(&pn->l2tp_session_hlist[hash]);
@@ -1662,11 +1657,13 @@ static __net_exit void l2tp_exit_net(struct net *net)
{
struct l2tp_net *pn = l2tp_pernet(net);
struct l2tp_tunnel *tunnel = NULL;
+ unsigned long tunnel_id, tmp;
int hash;
rcu_read_lock_bh();
- list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- l2tp_tunnel_delete(tunnel);
+ idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
+ if (tunnel)
+ l2tp_tunnel_delete(tunnel);
}
rcu_read_unlock_bh();
@@ -1676,6 +1673,7 @@ static __net_exit void l2tp_exit_net(struct net *net)
for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
WARN_ON_ONCE(!hlist_empty(&pn->l2tp_session_hlist[hash]));
+ idr_destroy(&pn->l2tp_tunnel_idr);
}
static struct pernet_operations l2tp_net_ops = {
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 9c40f8d3bce8..f9514bacbd4a 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -491,7 +491,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
{
struct tid_ampdu_tx *tid_tx;
struct ieee80211_local *local = sta->local;
- struct ieee80211_sub_if_data *sdata = sta->sdata;
+ struct ieee80211_sub_if_data *sdata;
struct ieee80211_ampdu_params params = {
.sta = &sta->sta,
.action = IEEE80211_AMPDU_TX_START,
@@ -511,8 +511,6 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
*/
clear_bit(HT_AGG_STATE_WANT_START, &tid_tx->state);
- ieee80211_agg_stop_txq(sta, tid);
-
/*
* Make sure no packets are being processed. This ensures that
* we have a valid starting sequence number and that in-flight
@@ -521,6 +519,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
*/
synchronize_net();
+ sdata = sta->sdata;
params.ssn = sta->tid_seq[tid] >> 4;
ret = drv_ampdu_action(local, sdata, &params);
tid_tx->ssn = params.ssn;
@@ -534,6 +533,9 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
*/
set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state);
} else if (ret) {
+ if (!sdata)
+ return;
+
ht_dbg(sdata,
"BA request denied - HW unavailable for %pM tid %d\n",
sta->sta.addr, tid);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 8f9a2ab502b3..672eff6f5d32 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -147,6 +147,7 @@ static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata,
link_conf->bssid_index = 0;
link_conf->nontransmitted = false;
link_conf->ema_ap = false;
+ link_conf->bssid_indicator = 0;
if (sdata->vif.type != NL80211_IFTYPE_AP || !params.tx_wdev)
return -EINVAL;
@@ -1511,6 +1512,12 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
kfree(link_conf->ftmr_params);
link_conf->ftmr_params = NULL;
+ sdata->vif.mbssid_tx_vif = NULL;
+ link_conf->bssid_index = 0;
+ link_conf->nontransmitted = false;
+ link_conf->ema_ap = false;
+ link_conf->bssid_indicator = 0;
+
__sta_info_flush(sdata, true);
ieee80211_free_keys(sdata, true);
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 7a3d7893e19d..f1914bf39f0e 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -167,7 +167,7 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
continue;
txqi = to_txq_info(sta->sta.txq[i]);
p += scnprintf(p, bufsz + buf - p,
- "%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s)\n",
+ "%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s%s)\n",
txqi->txq.tid,
txqi->txq.ac,
txqi->tin.backlog_bytes,
@@ -182,7 +182,8 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
txqi->flags,
test_bit(IEEE80211_TXQ_STOP, &txqi->flags) ? "STOP" : "RUN",
test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags) ? " AMPDU" : "",
- test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags) ? " NO-AMSDU" : "");
+ test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags) ? " NO-AMSDU" : "",
+ test_bit(IEEE80211_TXQ_DIRTY, &txqi->flags) ? " DIRTY" : "");
}
rcu_read_unlock();
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index d737db4e07e2..cfb09e4aed4d 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -392,6 +392,9 @@ int drv_ampdu_action(struct ieee80211_local *local,
might_sleep();
+ if (!sdata)
+ return -EIO;
+
sdata = get_bss_sdata(sdata);
if (!check_sdata_in_driver(sdata))
return -EIO;
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 809bad53e15b..5d13a3dfd366 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1199,7 +1199,7 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local,
/* In reconfig don't transmit now, but mark for waking later */
if (local->in_reconfig) {
- set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txq->flags);
+ set_bit(IEEE80211_TXQ_DIRTY, &txq->flags);
return;
}
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 83bc41346ae7..5315ab750280 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -391,6 +391,37 @@ void ieee80211_ba_session_work(struct work_struct *work)
tid_tx = sta->ampdu_mlme.tid_start_tx[tid];
if (!blocked && tid_tx) {
+ struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
+ struct ieee80211_sub_if_data *sdata =
+ vif_to_sdata(txqi->txq.vif);
+ struct fq *fq = &sdata->local->fq;
+
+ spin_lock_bh(&fq->lock);
+
+ /* Allow only frags to be dequeued */
+ set_bit(IEEE80211_TXQ_STOP, &txqi->flags);
+
+ if (!skb_queue_empty(&txqi->frags)) {
+ /* Fragmented Tx is ongoing, wait for it to
+ * finish. Reschedule worker to retry later.
+ */
+
+ spin_unlock_bh(&fq->lock);
+ spin_unlock_bh(&sta->lock);
+
+ /* Give the task working on the txq a chance
+ * to send out the queued frags
+ */
+ synchronize_net();
+
+ mutex_unlock(&sta->ampdu_mlme.mtx);
+
+ ieee80211_queue_work(&sdata->local->hw, work);
+ return;
+ }
+
+ spin_unlock_bh(&fq->lock);
+
/*
* Assign it over to the normal tid_tx array
* where it "goes live".
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 63ff0d2524b6..d16606e84e22 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -838,7 +838,7 @@ enum txq_info_flags {
IEEE80211_TXQ_STOP,
IEEE80211_TXQ_AMPDU,
IEEE80211_TXQ_NO_AMSDU,
- IEEE80211_TXQ_STOP_NETIF_TX,
+ IEEE80211_TXQ_DIRTY,
};
/**
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index d49a5906a943..23ed13f15067 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -364,7 +364,9 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
/* No support for VLAN with MLO yet */
if (iftype == NL80211_IFTYPE_AP_VLAN &&
- nsdata->wdev.use_4addr)
+ sdata->wdev.use_4addr &&
+ nsdata->vif.type == NL80211_IFTYPE_AP &&
+ nsdata->vif.valid_links)
return -EOPNOTSUPP;
/*
@@ -2195,7 +2197,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ret = cfg80211_register_netdevice(ndev);
if (ret) {
- ieee80211_if_free(ndev);
free_netdev(ndev);
return ret;
}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 7e3ab6e1b28f..c6562a6d2503 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4049,6 +4049,58 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx)
#undef CALL_RXH
}
+static bool
+ieee80211_rx_is_valid_sta_link_id(struct ieee80211_sta *sta, u8 link_id)
+{
+ if (!sta->mlo)
+ return false;
+
+ return !!(sta->valid_links & BIT(link_id));
+}
+
+static bool ieee80211_rx_data_set_link(struct ieee80211_rx_data *rx,
+ u8 link_id)
+{
+ rx->link_id = link_id;
+ rx->link = rcu_dereference(rx->sdata->link[link_id]);
+
+ if (!rx->sta)
+ return rx->link;
+
+ if (!ieee80211_rx_is_valid_sta_link_id(&rx->sta->sta, link_id))
+ return false;
+
+ rx->link_sta = rcu_dereference(rx->sta->link[link_id]);
+
+ return rx->link && rx->link_sta;
+}
+
+static bool ieee80211_rx_data_set_sta(struct ieee80211_rx_data *rx,
+ struct ieee80211_sta *pubsta,
+ int link_id)
+{
+ struct sta_info *sta;
+
+ sta = container_of(pubsta, struct sta_info, sta);
+
+ rx->link_id = link_id;
+ rx->sta = sta;
+
+ if (sta) {
+ rx->local = sta->sdata->local;
+ if (!rx->sdata)
+ rx->sdata = sta->sdata;
+ rx->link_sta = &sta->deflink;
+ }
+
+ if (link_id < 0)
+ rx->link = &rx->sdata->deflink;
+ else if (!ieee80211_rx_data_set_link(rx, link_id))
+ return false;
+
+ return true;
+}
+
/*
* This function makes calls into the RX path, therefore
* it has to be invoked under RCU read lock.
@@ -4057,16 +4109,19 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
{
struct sk_buff_head frames;
struct ieee80211_rx_data rx = {
- .sta = sta,
- .sdata = sta->sdata,
- .local = sta->local,
/* This is OK -- must be QoS data frame */
.security_idx = tid,
.seqno_idx = tid,
- .link_id = -1,
};
struct tid_ampdu_rx *tid_agg_rx;
- u8 link_id;
+ int link_id = -1;
+
+ /* FIXME: statistics won't be right with this */
+ if (sta->sta.valid_links)
+ link_id = ffs(sta->sta.valid_links) - 1;
+
+ if (!ieee80211_rx_data_set_sta(&rx, &sta->sta, link_id))
+ return;
tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
if (!tid_agg_rx)
@@ -4086,10 +4141,6 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
};
drv_event_callback(rx.local, rx.sdata, &event);
}
- /* FIXME: statistics won't be right with this */
- link_id = sta->sta.valid_links ? ffs(sta->sta.valid_links) - 1 : 0;
- rx.link = rcu_dereference(sta->sdata->link[link_id]);
- rx.link_sta = rcu_dereference(sta->link[link_id]);
ieee80211_rx_handlers(&rx, &frames);
}
@@ -4105,7 +4156,6 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
/* This is OK -- must be QoS data frame */
.security_idx = tid,
.seqno_idx = tid,
- .link_id = -1,
};
int i, diff;
@@ -4116,10 +4166,8 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
sta = container_of(pubsta, struct sta_info, sta);
- rx.sta = sta;
- rx.sdata = sta->sdata;
- rx.link = &rx.sdata->deflink;
- rx.local = sta->local;
+ if (!ieee80211_rx_data_set_sta(&rx, pubsta, -1))
+ return;
rcu_read_lock();
tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
@@ -4506,15 +4554,6 @@ void ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata)
mutex_unlock(&local->sta_mtx);
}
-static bool
-ieee80211_rx_is_valid_sta_link_id(struct ieee80211_sta *sta, u8 link_id)
-{
- if (!sta->mlo)
- return false;
-
- return !!(sta->valid_links & BIT(link_id));
-}
-
static void ieee80211_rx_8023(struct ieee80211_rx_data *rx,
struct ieee80211_fast_rx *fast_rx,
int orig_len)
@@ -4625,7 +4664,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
struct sk_buff *skb = rx->skb;
struct ieee80211_hdr *hdr = (void *)skb->data;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
- struct sta_info *sta = rx->sta;
int orig_len = skb->len;
int hdrlen = ieee80211_hdrlen(hdr->frame_control);
int snap_offs = hdrlen;
@@ -4637,7 +4675,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
u8 da[ETH_ALEN];
u8 sa[ETH_ALEN];
} addrs __aligned(2);
- struct link_sta_info *link_sta;
struct ieee80211_sta_rx_stats *stats;
/* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write
@@ -4740,18 +4777,10 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
drop:
dev_kfree_skb(skb);
- if (rx->link_id >= 0) {
- link_sta = rcu_dereference(sta->link[rx->link_id]);
- if (!link_sta)
- return true;
- } else {
- link_sta = &sta->deflink;
- }
-
if (fast_rx->uses_rss)
- stats = this_cpu_ptr(link_sta->pcpu_rx_stats);
+ stats = this_cpu_ptr(rx->link_sta->pcpu_rx_stats);
else
- stats = &link_sta->rx_stats;
+ stats = &rx->link_sta->rx_stats;
stats->dropped++;
return true;
@@ -4769,8 +4798,8 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
struct ieee80211_local *local = rx->local;
struct ieee80211_sub_if_data *sdata = rx->sdata;
struct ieee80211_hdr *hdr = (void *)skb->data;
- struct link_sta_info *link_sta = NULL;
- struct ieee80211_link_data *link;
+ struct link_sta_info *link_sta = rx->link_sta;
+ struct ieee80211_link_data *link = rx->link;
rx->skb = skb;
@@ -4792,35 +4821,6 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
if (!ieee80211_accept_frame(rx))
return false;
- if (rx->link_id >= 0) {
- link = rcu_dereference(rx->sdata->link[rx->link_id]);
-
- /* we might race link removal */
- if (!link)
- return true;
- rx->link = link;
-
- if (rx->sta) {
- rx->link_sta =
- rcu_dereference(rx->sta->link[rx->link_id]);
- if (!rx->link_sta)
- return true;
- }
- } else {
- if (rx->sta)
- rx->link_sta = &rx->sta->deflink;
-
- rx->link = &sdata->deflink;
- }
-
- if (unlikely(!is_multicast_ether_addr(hdr->addr1) &&
- rx->link_id >= 0 && rx->sta && rx->sta->sta.mlo)) {
- link_sta = rcu_dereference(rx->sta->link[rx->link_id]);
-
- if (WARN_ON_ONCE(!link_sta))
- return true;
- }
-
if (!consume) {
struct skb_shared_hwtstamps *shwt;
@@ -4838,9 +4838,12 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
*/
shwt = skb_hwtstamps(rx->skb);
shwt->hwtstamp = skb_hwtstamps(skb)->hwtstamp;
+
+ /* Update the hdr pointer to the new skb for translation below */
+ hdr = (struct ieee80211_hdr *)rx->skb->data;
}
- if (unlikely(link_sta)) {
+ if (unlikely(rx->sta && rx->sta->sta.mlo)) {
/* translate to MLD addresses */
if (ether_addr_equal(link->conf->addr, hdr->addr1))
ether_addr_copy(hdr->addr1, rx->sdata->vif.addr);
@@ -4870,6 +4873,7 @@ static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw,
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
struct ieee80211_fast_rx *fast_rx;
struct ieee80211_rx_data rx;
+ int link_id = -1;
memset(&rx, 0, sizeof(rx));
rx.skb = skb;
@@ -4886,12 +4890,8 @@ static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw,
if (!pubsta)
goto drop;
- rx.sta = container_of(pubsta, struct sta_info, sta);
- rx.sdata = rx.sta->sdata;
-
- if (status->link_valid &&
- !ieee80211_rx_is_valid_sta_link_id(pubsta, status->link_id))
- goto drop;
+ if (status->link_valid)
+ link_id = status->link_id;
/*
* TODO: Should the frame be dropped if the right link_id is not
@@ -4900,19 +4900,8 @@ static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw,
* link_id is used only for stats purpose and updating the stats on
* the deflink is fine?
*/
- if (status->link_valid)
- rx.link_id = status->link_id;
-
- if (rx.link_id >= 0) {
- struct ieee80211_link_data *link;
-
- link = rcu_dereference(rx.sdata->link[rx.link_id]);
- if (!link)
- goto drop;
- rx.link = link;
- } else {
- rx.link = &rx.sdata->deflink;
- }
+ if (!ieee80211_rx_data_set_sta(&rx, pubsta, link_id))
+ goto drop;
fast_rx = rcu_dereference(rx.sta->fast_rx);
if (!fast_rx)
@@ -4930,6 +4919,8 @@ static bool ieee80211_rx_for_interface(struct ieee80211_rx_data *rx,
{
struct link_sta_info *link_sta;
struct ieee80211_hdr *hdr = (void *)skb->data;
+ struct sta_info *sta;
+ int link_id = -1;
/*
* Look up link station first, in case there's a
@@ -4939,24 +4930,19 @@ static bool ieee80211_rx_for_interface(struct ieee80211_rx_data *rx,
*/
link_sta = link_sta_info_get_bss(rx->sdata, hdr->addr2);
if (link_sta) {
- rx->sta = link_sta->sta;
- rx->link_id = link_sta->link_id;
+ sta = link_sta->sta;
+ link_id = link_sta->link_id;
} else {
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
- rx->sta = sta_info_get_bss(rx->sdata, hdr->addr2);
- if (rx->sta) {
- if (status->link_valid &&
- !ieee80211_rx_is_valid_sta_link_id(&rx->sta->sta,
- status->link_id))
- return false;
-
- rx->link_id = status->link_valid ? status->link_id : -1;
- } else {
- rx->link_id = -1;
- }
+ sta = sta_info_get_bss(rx->sdata, hdr->addr2);
+ if (status->link_valid)
+ link_id = status->link_id;
}
+ if (!ieee80211_rx_data_set_sta(rx, &sta->sta, link_id))
+ return false;
+
return ieee80211_prepare_and_rx_handle(rx, skb, consume);
}
@@ -5015,19 +5001,15 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
if (ieee80211_is_data(fc)) {
struct sta_info *sta, *prev_sta;
- u8 link_id = status->link_id;
+ int link_id = -1;
- if (pubsta) {
- rx.sta = container_of(pubsta, struct sta_info, sta);
- rx.sdata = rx.sta->sdata;
+ if (status->link_valid)
+ link_id = status->link_id;
- if (status->link_valid &&
- !ieee80211_rx_is_valid_sta_link_id(pubsta, link_id))
+ if (pubsta) {
+ if (!ieee80211_rx_data_set_sta(&rx, pubsta, link_id))
goto out;
- if (status->link_valid)
- rx.link_id = status->link_id;
-
/*
* In MLO connection, fetch the link_id using addr2
* when the driver does not pass link_id in status.
@@ -5045,7 +5027,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
if (!link_sta)
goto out;
- rx.link_id = link_sta->link_id;
+ ieee80211_rx_data_set_link(&rx, link_sta->link_id);
}
if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
@@ -5061,30 +5043,27 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
continue;
}
- if ((status->link_valid &&
- !ieee80211_rx_is_valid_sta_link_id(&prev_sta->sta,
- link_id)) ||
- (!status->link_valid && prev_sta->sta.mlo))
+ rx.sdata = prev_sta->sdata;
+ if (!ieee80211_rx_data_set_sta(&rx, &prev_sta->sta,
+ link_id))
+ goto out;
+
+ if (!status->link_valid && prev_sta->sta.mlo)
continue;
- rx.link_id = status->link_valid ? link_id : -1;
- rx.sta = prev_sta;
- rx.sdata = prev_sta->sdata;
ieee80211_prepare_and_rx_handle(&rx, skb, false);
prev_sta = sta;
}
if (prev_sta) {
- if ((status->link_valid &&
- !ieee80211_rx_is_valid_sta_link_id(&prev_sta->sta,
- link_id)) ||
- (!status->link_valid && prev_sta->sta.mlo))
+ rx.sdata = prev_sta->sdata;
+ if (!ieee80211_rx_data_set_sta(&rx, &prev_sta->sta,
+ link_id))
goto out;
- rx.link_id = status->link_valid ? link_id : -1;
- rx.sta = prev_sta;
- rx.sdata = prev_sta->sdata;
+ if (!status->link_valid && prev_sta->sta.mlo)
+ goto out;
if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
return;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 2171cd1ca807..defe97a31724 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1129,7 +1129,6 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
struct sk_buff *purge_skb = NULL;
if (test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) {
- info->flags |= IEEE80211_TX_CTL_AMPDU;
reset_agg_timer = true;
} else if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) {
/*
@@ -1161,7 +1160,6 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
if (!tid_tx) {
/* do nothing, let packet pass through */
} else if (test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) {
- info->flags |= IEEE80211_TX_CTL_AMPDU;
reset_agg_timer = true;
} else {
queued = true;
@@ -3677,8 +3675,7 @@ static void __ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
info->band = fast_tx->band;
info->control.vif = &sdata->vif;
info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT |
- IEEE80211_TX_CTL_DONTFRAG |
- (ampdu ? IEEE80211_TX_CTL_AMPDU : 0);
+ IEEE80211_TX_CTL_DONTFRAG;
info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT |
u32_encode_bits(IEEE80211_LINK_UNSPECIFIED,
IEEE80211_TX_CTRL_MLO_LINK);
@@ -3783,6 +3780,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
struct ieee80211_tx_data tx;
ieee80211_tx_result r;
struct ieee80211_vif *vif = txq->vif;
+ int q = vif->hw_queue[txq->ac];
+ bool q_stopped;
WARN_ON_ONCE(softirq_count() == 0);
@@ -3790,17 +3789,18 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
return NULL;
begin:
- spin_lock_bh(&fq->lock);
-
- if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags) ||
- test_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags))
- goto out;
+ spin_lock(&local->queue_stop_reason_lock);
+ q_stopped = local->queue_stop_reasons[q];
+ spin_unlock(&local->queue_stop_reason_lock);
- if (vif->txqs_stopped[txq->ac]) {
- set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags);
- goto out;
+ if (unlikely(q_stopped)) {
+ /* mark for waking later */
+ set_bit(IEEE80211_TXQ_DIRTY, &txqi->flags);
+ return NULL;
}
+ spin_lock_bh(&fq->lock);
+
/* Make sure fragments stay together. */
skb = __skb_dequeue(&txqi->frags);
if (unlikely(skb)) {
@@ -3810,6 +3810,9 @@ begin:
IEEE80211_SKB_CB(skb)->control.flags &=
~IEEE80211_TX_INTCFL_NEED_TXPROCESSING;
} else {
+ if (unlikely(test_bit(IEEE80211_TXQ_STOP, &txqi->flags)))
+ goto out;
+
skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func);
}
@@ -3860,9 +3863,8 @@ begin:
}
if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags))
- info->flags |= IEEE80211_TX_CTL_AMPDU;
- else
- info->flags &= ~IEEE80211_TX_CTL_AMPDU;
+ info->flags |= (IEEE80211_TX_CTL_AMPDU |
+ IEEE80211_TX_CTL_DONTFRAG);
if (info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) {
if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
@@ -4596,8 +4598,6 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
info = IEEE80211_SKB_CB(skb);
memset(info, 0, sizeof(*info));
- if (tid_tx)
- info->flags |= IEEE80211_TX_CTL_AMPDU;
info->hw_queue = sdata->vif.hw_queue[queue];
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 6f5407038459..261ac667887f 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -292,22 +292,12 @@ static void wake_tx_push_queue(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct ieee80211_txq *queue)
{
- int q = sdata->vif.hw_queue[queue->ac];
struct ieee80211_tx_control control = {
.sta = queue->sta,
};
struct sk_buff *skb;
- unsigned long flags;
- bool q_stopped;
while (1) {
- spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
- q_stopped = local->queue_stop_reasons[q];
- spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
-
- if (q_stopped)
- break;
-
skb = ieee80211_tx_dequeue(&local->hw, queue);
if (!skb)
break;
@@ -347,8 +337,6 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac)
local_bh_disable();
spin_lock(&fq->lock);
- sdata->vif.txqs_stopped[ac] = false;
-
if (!test_bit(SDATA_STATE_RUNNING, &sdata->state))
goto out;
@@ -370,7 +358,7 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac)
if (ac != txq->ac)
continue;
- if (!test_and_clear_bit(IEEE80211_TXQ_STOP_NETIF_TX,
+ if (!test_and_clear_bit(IEEE80211_TXQ_DIRTY,
&txqi->flags))
continue;
@@ -385,7 +373,7 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac)
txqi = to_txq_info(vif->txq);
- if (!test_and_clear_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags) ||
+ if (!test_and_clear_bit(IEEE80211_TXQ_DIRTY, &txqi->flags) ||
(ps && atomic_read(&ps->num_sta_ps)) || ac != vif->txq->ac)
goto out;
@@ -517,8 +505,6 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
bool refcounted)
{
struct ieee80211_local *local = hw_to_local(hw);
- struct ieee80211_sub_if_data *sdata;
- int n_acs = IEEE80211_NUM_ACS;
trace_stop_queue(local, queue, reason);
@@ -530,29 +516,7 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
else
local->q_stop_reasons[queue][reason]++;
- if (__test_and_set_bit(reason, &local->queue_stop_reasons[queue]))
- return;
-
- if (local->hw.queues < IEEE80211_NUM_ACS)
- n_acs = 1;
-
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- int ac;
-
- if (!sdata->dev)
- continue;
-
- for (ac = 0; ac < n_acs; ac++) {
- if (sdata->vif.hw_queue[ac] == queue ||
- sdata->vif.cab_queue == queue) {
- spin_lock(&local->fq.lock);
- sdata->vif.txqs_stopped[ac] = true;
- spin_unlock(&local->fq.lock);
- }
- }
- }
- rcu_read_unlock();
+ set_bit(reason, &local->queue_stop_reasons[queue]);
}
void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue,
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index c2aae2a6d6a6..97bb4401dd3e 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -213,7 +213,6 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local,
ret = ieee802154_parse_frame_start(skb, &hdr);
if (ret) {
pr_debug("got invalid frame\n");
- kfree_skb(skb);
return;
}
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index fc9e728b6333..3150f3f0c872 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -544,9 +544,6 @@ static int mctp_sk_init(struct sock *sk)
static void mctp_sk_close(struct sock *sk, long timeout)
{
- struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
-
- del_timer_sync(&msk->key_expiry);
sk_common_release(sk);
}
@@ -580,7 +577,19 @@ static void mctp_sk_unhash(struct sock *sk)
spin_lock_irqsave(&key->lock, fl2);
__mctp_key_remove(key, net, fl2, MCTP_TRACE_KEY_CLOSED);
}
+ sock_set_flag(sk, SOCK_DEAD);
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+
+ /* Since there are no more tag allocations (we have removed all of the
+ * keys), stop any pending expiry events. the timer cannot be re-queued
+ * as the sk is no longer observable
+ */
+ del_timer_sync(&msk->key_expiry);
+}
+
+static void mctp_sk_destruct(struct sock *sk)
+{
+ skb_queue_purge(&sk->sk_receive_queue);
}
static struct proto mctp_proto = {
@@ -619,6 +628,7 @@ static int mctp_pf_create(struct net *net, struct socket *sock,
return -ENOMEM;
sock_init_data(sock, sk);
+ sk->sk_destruct = mctp_sk_destruct;
rc = 0;
if (sk->sk_prot->init)
diff --git a/net/mctp/route.c b/net/mctp/route.c
index f9a80b82dc51..f51a05ec7162 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -147,6 +147,7 @@ static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
key->valid = true;
spin_lock_init(&key->lock);
refcount_set(&key->refs, 1);
+ sock_hold(key->sk);
return key;
}
@@ -165,6 +166,7 @@ void mctp_key_unref(struct mctp_sk_key *key)
mctp_dev_release_key(key->dev, key);
spin_unlock_irqrestore(&key->lock, flags);
+ sock_put(key->sk);
kfree(key);
}
@@ -177,6 +179,11 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
spin_lock_irqsave(&net->mctp.keys_lock, flags);
+ if (sock_flag(&msk->sk, SOCK_DEAD)) {
+ rc = -EINVAL;
+ goto out_unlock;
+ }
+
hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
if (mctp_key_match(tmp, key->local_addr, key->peer_addr,
key->tag)) {
@@ -198,6 +205,7 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
hlist_add_head(&key->sklist, &msk->keys);
}
+out_unlock:
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
return rc;
@@ -315,8 +323,8 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
{
+ struct mctp_sk_key *key, *any_key = NULL;
struct net *net = dev_net(skb->dev);
- struct mctp_sk_key *key;
struct mctp_sock *msk;
struct mctp_hdr *mh;
unsigned long f;
@@ -361,13 +369,11 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
* key for reassembly - we'll create a more specific
* one for future packets if required (ie, !EOM).
*/
- key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f);
- if (key) {
- msk = container_of(key->sk,
+ any_key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f);
+ if (any_key) {
+ msk = container_of(any_key->sk,
struct mctp_sock, sk);
- spin_unlock_irqrestore(&key->lock, f);
- mctp_key_unref(key);
- key = NULL;
+ spin_unlock_irqrestore(&any_key->lock, f);
}
}
@@ -419,14 +425,14 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
* this function.
*/
rc = mctp_key_add(key, msk);
- if (rc) {
- kfree(key);
- } else {
+ if (!rc)
trace_mctp_key_acquire(key);
- /* we don't need to release key->lock on exit */
- mctp_key_unref(key);
- }
+ /* we don't need to release key->lock on exit, so
+ * clean up here and suppress the unlock via
+ * setting to NULL
+ */
+ mctp_key_unref(key);
key = NULL;
} else {
@@ -473,6 +479,8 @@ out_unlock:
spin_unlock_irqrestore(&key->lock, f);
mctp_key_unref(key);
}
+ if (any_key)
+ mctp_key_unref(any_key);
out:
if (rc)
kfree_skb(skb);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 35b5f806fdda..dc5165d3eec4 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1428,6 +1428,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev,
free:
kfree(table);
out:
+ mdev->sysctl = NULL;
return -ENOBUFS;
}
@@ -1437,6 +1438,9 @@ static void mpls_dev_sysctl_unregister(struct net_device *dev,
struct net *net = dev_net(dev);
struct ctl_table *table;
+ if (!mdev->sysctl)
+ return;
+
table = mdev->sysctl->ctl_table_arg;
unregister_net_sysctl_table(mdev->sysctl);
kfree(table);
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 45e2a48397b9..70f0ced3ca86 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -420,6 +420,31 @@ void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
}
}
+/* if sk is ipv4 or ipv6_only allows only same-family local and remote addresses,
+ * otherwise allow any matching local/remote pair
+ */
+bool mptcp_pm_addr_families_match(const struct sock *sk,
+ const struct mptcp_addr_info *loc,
+ const struct mptcp_addr_info *rem)
+{
+ bool mptcp_is_v4 = sk->sk_family == AF_INET;
+
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ bool loc_is_v4 = loc->family == AF_INET || ipv6_addr_v4mapped(&loc->addr6);
+ bool rem_is_v4 = rem->family == AF_INET || ipv6_addr_v4mapped(&rem->addr6);
+
+ if (mptcp_is_v4)
+ return loc_is_v4 && rem_is_v4;
+
+ if (ipv6_only_sock(sk))
+ return !loc_is_v4 && !rem_is_v4;
+
+ return loc_is_v4 == rem_is_v4;
+#else
+ return mptcp_is_v4 && loc->family == AF_INET && rem->family == AF_INET;
+#endif
+}
+
void mptcp_pm_data_reset(struct mptcp_sock *msk)
{
u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk));
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 2ea7eae43bdb..10fe9771a852 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -998,8 +998,8 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
{
int addrlen = sizeof(struct sockaddr_in);
struct sockaddr_storage addr;
- struct mptcp_sock *msk;
struct socket *ssock;
+ struct sock *newsk;
int backlog = 1024;
int err;
@@ -1008,11 +1008,13 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
if (err)
return err;
- msk = mptcp_sk(entry->lsk->sk);
- if (!msk)
+ newsk = entry->lsk->sk;
+ if (!newsk)
return -EINVAL;
- ssock = __mptcp_nmpc_socket(msk);
+ lock_sock(newsk);
+ ssock = __mptcp_nmpc_socket(mptcp_sk(newsk));
+ release_sock(newsk);
if (!ssock)
return -EINVAL;
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 65dcc55a8ad8..ea6ad9da7493 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -294,6 +294,13 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
}
sk = (struct sock *)msk;
+
+ if (!mptcp_pm_addr_families_match(sk, &addr_l, &addr_r)) {
+ GENL_SET_ERR_MSG(info, "families mismatch");
+ err = -EINVAL;
+ goto create_err;
+ }
+
lock_sock(sk);
err = __mptcp_subflow_connect(sk, &addr_l, &addr_r);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index f6f93957275b..bc6c1f62a690 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -98,7 +98,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
struct socket *ssock;
int err;
- err = mptcp_subflow_create_socket(sk, &ssock);
+ err = mptcp_subflow_create_socket(sk, sk->sk_family, &ssock);
if (err)
return err;
@@ -1662,6 +1662,8 @@ static void mptcp_set_nospace(struct sock *sk)
set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags);
}
+static int mptcp_disconnect(struct sock *sk, int flags);
+
static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg,
size_t len, int *copied_syn)
{
@@ -1672,9 +1674,9 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh
lock_sock(ssk);
msg->msg_flags |= MSG_DONTWAIT;
msk->connect_flags = O_NONBLOCK;
- msk->is_sendmsg = 1;
+ msk->fastopening = 1;
ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL);
- msk->is_sendmsg = 0;
+ msk->fastopening = 0;
msg->msg_flags = saved_flags;
release_sock(ssk);
@@ -1688,6 +1690,8 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh
*/
if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR)
*copied_syn = 0;
+ } else if (ret && ret != -EINPROGRESS) {
+ mptcp_disconnect(sk, 0);
}
return ret;
@@ -2353,7 +2357,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
/* otherwise tcp will dispose of the ssk and subflow ctx */
if (ssk->sk_state == TCP_LISTEN) {
tcp_set_state(ssk, TCP_CLOSE);
- mptcp_subflow_queue_clean(ssk);
+ mptcp_subflow_queue_clean(sk, ssk);
inet_csk_listen_stop(ssk);
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
}
@@ -2893,6 +2897,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
bool do_cancel_work = false;
+ int subflows_alive = 0;
sk->sk_shutdown = SHUTDOWN_MASK;
@@ -2918,6 +2923,8 @@ cleanup:
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow = lock_sock_fast_nested(ssk);
+ subflows_alive += ssk->sk_state != TCP_CLOSE;
+
/* since the close timeout takes precedence on the fail one,
* cancel the latter
*/
@@ -2933,6 +2940,12 @@ cleanup:
}
sock_orphan(sk);
+ /* all the subflows are closed, only timeout can change the msk
+ * state, let's not keep resources busy for no reasons
+ */
+ if (subflows_alive == 0)
+ inet_sk_state_store(sk, TCP_CLOSE);
+
sock_hold(sk);
pr_debug("msk=%p state=%d", sk, sk->sk_state);
if (msk->token)
@@ -2989,6 +3002,14 @@ static int mptcp_disconnect(struct sock *sk, int flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
+ /* We are on the fastopen error path. We can't call straight into the
+ * subflows cleanup code due to lock nesting (we are already under
+ * msk->firstsocket lock). Do nothing and leave the cleanup to the
+ * caller.
+ */
+ if (msk->fastopening)
+ return 0;
+
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_stop_timer(sk);
@@ -3532,7 +3553,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* if reaching here via the fastopen/sendmsg path, the caller already
* acquired the subflow socket lock, too.
*/
- if (msk->is_sendmsg)
+ if (msk->fastopening)
err = __inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags, 1);
else
err = inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 955fb3d88eb3..601469249da8 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -295,7 +295,7 @@ struct mptcp_sock {
u8 recvmsg_inq:1,
cork:1,
nodelay:1,
- is_sendmsg:1;
+ fastopening:1;
int connect_flags;
struct work_struct work;
struct sk_buff *ooo_last_skb;
@@ -628,7 +628,7 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
void __mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
-void mptcp_subflow_queue_clean(struct sock *ssk);
+void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
bool __mptcp_close(struct sock *sk, long timeout);
@@ -641,7 +641,8 @@ bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
const struct mptcp_addr_info *remote);
-int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
+int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
+ struct socket **new_sock);
void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
struct sockaddr_storage *addr,
unsigned short family);
@@ -776,6 +777,9 @@ int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info,
bool require_family,
struct mptcp_pm_addr_entry *entry);
+bool mptcp_pm_addr_families_match(const struct sock *sk,
+ const struct mptcp_addr_info *loc,
+ const struct mptcp_addr_info *rem);
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index d4b1e6ec1b36..7f2c3727ab23 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -760,14 +760,21 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
+ struct sock *sk = (struct sock *)msk;
struct socket *sock;
+ int ret = -EINVAL;
/* Limit to first subflow, before the connection establishment */
+ lock_sock(sk);
sock = __mptcp_nmpc_socket(msk);
if (!sock)
- return -EINVAL;
+ goto unlock;
- return tcp_setsockopt(sock->sk, level, optname, optval, optlen);
+ ret = tcp_setsockopt(sock->sk, level, optname, optval, optlen);
+
+unlock:
+ release_sock(sk);
+ return ret;
}
static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index d1d32a66ae3f..32904c76c6a1 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1399,6 +1399,7 @@ void __mptcp_error_report(struct sock *sk)
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
int err = sock_error(ssk);
+ int ssk_state;
if (!err)
continue;
@@ -1409,7 +1410,14 @@ void __mptcp_error_report(struct sock *sk)
if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
continue;
- inet_sk_state_store(sk, inet_sk_state_load(ssk));
+ /* We need to propagate only transition to CLOSE state.
+ * Orphaned socket will see such state change via
+ * subflow_sched_work_if_closed() and that path will properly
+ * destroy the msk as needed.
+ */
+ ssk_state = inet_sk_state_load(ssk);
+ if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
+ inet_sk_state_store(sk, ssk_state);
sk->sk_err = -err;
/* This barrier is coupled with smp_rmb() in mptcp_poll() */
@@ -1547,7 +1555,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
if (!mptcp_is_fully_established(sk))
goto err_out;
- err = mptcp_subflow_create_socket(sk, &sf);
+ err = mptcp_subflow_create_socket(sk, loc->family, &sf);
if (err)
goto err_out;
@@ -1660,7 +1668,9 @@ static void mptcp_subflow_ops_undo_override(struct sock *ssk)
#endif
ssk->sk_prot = &tcp_prot;
}
-int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
+
+int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
+ struct socket **new_sock)
{
struct mptcp_subflow_context *subflow;
struct net *net = sock_net(sk);
@@ -1673,12 +1683,11 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
if (unlikely(!sk->sk_socket))
return -EINVAL;
- err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP,
- &sf);
+ err = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, &sf);
if (err)
return err;
- lock_sock(sf->sk);
+ lock_sock_nested(sf->sk, SINGLE_DEPTH_NESTING);
/* the newly created socket has to be in the same cgroup as its parent */
mptcp_attach_cgroup(sk, sf->sk);
@@ -1791,7 +1800,7 @@ static void subflow_state_change(struct sock *sk)
}
}
-void mptcp_subflow_queue_clean(struct sock *listener_ssk)
+void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
{
struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
struct mptcp_sock *msk, *next, *head = NULL;
@@ -1840,8 +1849,23 @@ void mptcp_subflow_queue_clean(struct sock *listener_ssk)
do_cancel_work = __mptcp_close(sk, 0);
release_sock(sk);
- if (do_cancel_work)
+ if (do_cancel_work) {
+ /* lockdep will report a false positive ABBA deadlock
+ * between cancel_work_sync and the listener socket.
+ * The involved locks belong to different sockets WRT
+ * the existing AB chain.
+ * Using a per socket key is problematic as key
+ * deregistration requires process context and must be
+ * performed at socket disposal time, in atomic
+ * context.
+ * Just tell lockdep to consider the listener socket
+ * released here.
+ */
+ mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_);
mptcp_cancel_work(sk);
+ mutex_acquire(&listener_sk->sk_lock.dep_map,
+ SINGLE_DEPTH_NESTING, 0, _RET_IP_);
+ }
sock_put(sk);
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index a8ce04a4bb72..e4fa00abde6a 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -308,8 +308,8 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
return -IPSET_ERR_BITMAP_RANGE;
pr_debug("mask_bits %u, netmask %u\n", mask_bits, netmask);
- hosts = 2 << (32 - netmask - 1);
- elements = 2 << (netmask - mask_bits - 1);
+ hosts = 2U << (32 - netmask - 1);
+ elements = 2UL << (netmask - mask_bits - 1);
}
if (elements > IPSET_BITMAP_MAX_RANGE + 1)
return -IPSET_ERR_BITMAP_RANGE_SIZE;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index e7ba5b6dd2b7..46ebee9400da 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1698,9 +1698,10 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb,
ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
ip_set_unlock(set);
retried = true;
- } while (ret == -EAGAIN &&
- set->variant->resize &&
- (ret = set->variant->resize(set, retried)) == 0);
+ } while (ret == -ERANGE ||
+ (ret == -EAGAIN &&
+ set->variant->resize &&
+ (ret = set->variant->resize(set, retried)) == 0));
if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
return 0;
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index e30513cefd90..c9f4e3859663 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -100,11 +100,11 @@ static int
hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ip4 *h = set->data;
+ struct hash_ip4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ip4_elem e = { 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0, hosts;
+ u32 ip = 0, ip_to = 0, hosts, i = 0;
int ret = 0;
if (tb[IPSET_ATTR_LINENO])
@@ -149,14 +149,14 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
- /* 64bit division is not allowed on 32bit */
- if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE)
- return -ERANGE;
-
if (retried)
ip = ntohl(h->next.ip);
- for (; ip <= ip_to;) {
+ for (; ip <= ip_to; i++) {
e.ip = htonl(ip);
+ if (i > IPSET_MAX_RANGE) {
+ hash_ip4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 153de3457423..a22ec1a6f6ec 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -97,11 +97,11 @@ static int
hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipmark4 *h = set->data;
+ struct hash_ipmark4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipmark4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip, ip_to = 0;
+ u32 ip, ip_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -148,13 +148,14 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
ip_set_mask_from_to(ip, ip_to, cidr);
}
- if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
-
if (retried)
ip = ntohl(h->next.ip);
- for (; ip <= ip_to; ip++) {
+ for (; ip <= ip_to; ip++, i++) {
e.ip = htonl(ip);
+ if (i > IPSET_MAX_RANGE) {
+ hash_ipmark4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 2ffbd0b78a8c..e977b5a9c48d 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -112,11 +112,11 @@ static int
hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipport4 *h = set->data;
+ struct hash_ipport4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipport4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip, ip_to = 0, p = 0, port, port_to;
+ u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
bool with_ports = false;
int ret;
@@ -184,17 +184,18 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
-
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
- for (; p <= port_to; p++) {
+ for (; p <= port_to; p++, i++) {
e.ip = htonl(ip);
e.port = htons(p);
+ if (i > IPSET_MAX_RANGE) {
+ hash_ipport4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 334fb1ad0e86..39a01934b153 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -108,11 +108,11 @@ static int
hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipportip4 *h = set->data;
+ struct hash_ipportip4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportip4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip, ip_to = 0, p = 0, port, port_to;
+ u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
bool with_ports = false;
int ret;
@@ -180,17 +180,18 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
-
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
- for (; p <= port_to; p++) {
+ for (; p <= port_to; p++, i++) {
e.ip = htonl(ip);
e.port = htons(p);
+ if (i > IPSET_MAX_RANGE) {
+ hash_ipportip4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 7df94f437f60..5c6de605a9fb 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -160,12 +160,12 @@ static int
hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipportnet4 *h = set->data;
+ struct hash_ipportnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, p = 0, port, port_to;
- u32 ip2_from = 0, ip2_to = 0, ip2;
+ u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
bool with_ports = false;
u8 cidr;
int ret;
@@ -253,9 +253,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
-
ip2_to = ip2_from;
if (tb[IPSET_ATTR_IP2_TO]) {
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
@@ -282,9 +279,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
for (; p <= port_to; p++) {
e.port = htons(p);
do {
+ i++;
e.ip2 = htonl(ip2);
ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr);
e.cidr = cidr - 1;
+ if (i > IPSET_MAX_RANGE) {
+ hash_ipportnet4_data_next(&h->next,
+ &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 1422739d9aa2..ce0a9ce5a91f 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -136,11 +136,11 @@ static int
hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_net4 *h = set->data;
+ struct hash_net4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net4_elem e = { .cidr = HOST_MASK };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0, ipn, n = 0;
+ u32 ip = 0, ip_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -188,19 +188,16 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ip + UINT_MAX == ip_to)
return -IPSET_ERR_HASH_RANGE;
}
- ipn = ip;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
- n++;
- } while (ipn++ < ip_to);
-
- if (n > IPSET_MAX_RANGE)
- return -ERANGE;
if (retried)
ip = ntohl(h->next.ip);
do {
+ i++;
e.ip = htonl(ip);
+ if (i > IPSET_MAX_RANGE) {
+ hash_net4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 9810f5bf63f5..031073286236 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0, ipn, n = 0;
+ u32 ip = 0, ip_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -256,19 +256,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip, ip_to, e.cidr);
}
- ipn = ip;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
- n++;
- } while (ipn++ < ip_to);
-
- if (n > IPSET_MAX_RANGE)
- return -ERANGE;
if (retried)
ip = ntohl(h->next.ip);
do {
+ i++;
e.ip = htonl(ip);
+ if (i > IPSET_MAX_RANGE) {
+ hash_netiface4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
ret = adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index cdfb78c6e0d3..8fbe649c9dd3 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -166,13 +166,12 @@ static int
hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_netnet4 *h = set->data;
+ struct hash_netnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0;
- u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn;
- u64 n = 0, m = 0;
+ u32 ip2 = 0, ip2_from = 0, ip2_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -248,19 +247,6 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
- ipn = ip;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
- n++;
- } while (ipn++ < ip_to);
- ipn = ip2_from;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
- m++;
- } while (ipn++ < ip2_to);
-
- if (n*m > IPSET_MAX_RANGE)
- return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip[0]);
@@ -273,7 +259,12 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
e.ip[0] = htonl(ip);
ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
do {
+ i++;
e.ip[1] = htonl(ip2);
+ if (i > IPSET_MAX_RANGE) {
+ hash_netnet4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 09cf72eb37f8..d1a0628df4ef 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -154,12 +154,11 @@ static int
hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_netport4 *h = set->data;
+ struct hash_netport4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn;
- u64 n = 0;
+ u32 port, port_to, p = 0, ip = 0, ip_to = 0, i = 0;
bool with_ports = false;
u8 cidr;
int ret;
@@ -236,14 +235,6 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
}
- ipn = ip;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr);
- n++;
- } while (ipn++ < ip_to);
-
- if (n*(port_to - port + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip);
@@ -255,8 +246,12 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
e.ip = htonl(ip);
ip = ip_set_range_to_cidr(ip, ip_to, &cidr);
e.cidr = cidr - 1;
- for (; p <= port_to; p++) {
+ for (; p <= port_to; p++, i++) {
e.port = htons(p);
+ if (i > IPSET_MAX_RANGE) {
+ hash_netport4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 19bcdb3141f6..005a7ce87217 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -173,17 +173,26 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
+static u32
+hash_netportnet4_range_to_cidr(u32 from, u32 to, u8 *cidr)
+{
+ if (from == 0 && to == UINT_MAX) {
+ *cidr = 0;
+ return to;
+ }
+ return ip_set_range_to_cidr(from, to, cidr);
+}
+
static int
hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_netportnet4 *h = set->data;
+ struct hash_netportnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netportnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, p = 0, port, port_to;
- u32 ip2_from = 0, ip2_to = 0, ip2, ipn;
- u64 n = 0, m = 0;
+ u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
bool with_ports = false;
int ret;
@@ -285,19 +294,6 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
- ipn = ip;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
- n++;
- } while (ipn++ < ip_to);
- ipn = ip2_from;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
- m++;
- } while (ipn++ < ip2_to);
-
- if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip[0]);
@@ -310,13 +306,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
do {
e.ip[0] = htonl(ip);
- ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
+ ip = hash_netportnet4_range_to_cidr(ip, ip_to, &e.cidr[0]);
for (; p <= port_to; p++) {
e.port = htons(p);
do {
+ i++;
e.ip[1] = htonl(ip2);
- ip2 = ip_set_range_to_cidr(ip2, ip2_to,
- &e.cidr[1]);
+ if (i > IPSET_MAX_RANGE) {
+ hash_netportnet4_data_next(&h->next,
+ &e);
+ return -ERANGE;
+ }
+ ip2 = hash_netportnet4_range_to_cidr(ip2,
+ ip2_to, &e.cidr[1]);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 99323fb12d0f..ccef340be575 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -141,6 +141,7 @@ unsigned int nf_confirm(void *priv,
struct nf_conn *ct;
bool seqadj_needed;
__be16 frag_off;
+ int start;
u8 pnum;
ct = nf_ct_get(skb, &ctinfo);
@@ -163,9 +164,11 @@ unsigned int nf_confirm(void *priv,
break;
case NFPROTO_IPV6:
pnum = ipv6_hdr(skb)->nexthdr;
- protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off);
- if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
+ start = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off);
+ if (start < 0 || (frag_off & htons(~0x7)) != 0)
return nf_conntrack_confirm(skb);
+
+ protoff = start;
break;
default:
return nf_conntrack_confirm(skb);
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index d88b92a8ffca..011d414038ea 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -27,22 +27,16 @@
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_timeout.h>
-/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
- closely. They're more complex. --RR
-
- And so for me for SCTP :D -Kiran */
-
static const char *const sctp_conntrack_names[] = {
- "NONE",
- "CLOSED",
- "COOKIE_WAIT",
- "COOKIE_ECHOED",
- "ESTABLISHED",
- "SHUTDOWN_SENT",
- "SHUTDOWN_RECD",
- "SHUTDOWN_ACK_SENT",
- "HEARTBEAT_SENT",
- "HEARTBEAT_ACKED",
+ [SCTP_CONNTRACK_NONE] = "NONE",
+ [SCTP_CONNTRACK_CLOSED] = "CLOSED",
+ [SCTP_CONNTRACK_COOKIE_WAIT] = "COOKIE_WAIT",
+ [SCTP_CONNTRACK_COOKIE_ECHOED] = "COOKIE_ECHOED",
+ [SCTP_CONNTRACK_ESTABLISHED] = "ESTABLISHED",
+ [SCTP_CONNTRACK_SHUTDOWN_SENT] = "SHUTDOWN_SENT",
+ [SCTP_CONNTRACK_SHUTDOWN_RECD] = "SHUTDOWN_RECD",
+ [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = "SHUTDOWN_ACK_SENT",
+ [SCTP_CONNTRACK_HEARTBEAT_SENT] = "HEARTBEAT_SENT",
};
#define SECS * HZ
@@ -54,13 +48,11 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
[SCTP_CONNTRACK_CLOSED] = 10 SECS,
[SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS,
[SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS,
- [SCTP_CONNTRACK_ESTABLISHED] = 5 DAYS,
+ [SCTP_CONNTRACK_ESTABLISHED] = 210 SECS,
[SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS,
[SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS,
- [SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS,
- [SCTP_CONNTRACK_DATA_SENT] = 30 SECS,
};
#define SCTP_FLAG_HEARTBEAT_VTAG_FAILED 1
@@ -74,8 +66,6 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD
#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
#define sHS SCTP_CONNTRACK_HEARTBEAT_SENT
-#define sHA SCTP_CONNTRACK_HEARTBEAT_ACKED
-#define sDS SCTP_CONNTRACK_DATA_SENT
#define sIV SCTP_CONNTRACK_MAX
/*
@@ -98,10 +88,6 @@ SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
the SHUTDOWN chunk. Connection is closed.
HEARTBEAT_SENT - We have seen a HEARTBEAT in a new flow.
-HEARTBEAT_ACKED - We have seen a HEARTBEAT-ACK/DATA/SACK in the direction
- opposite to that of the HEARTBEAT/DATA chunk. Secondary connection
- is established.
-DATA_SENT - We have seen a DATA/SACK in a new flow.
*/
/* TODO
@@ -115,38 +101,36 @@ cookie echoed to closed.
*/
/* SCTP conntrack state transitions */
-static const u8 sctp_conntracks[2][12][SCTP_CONNTRACK_MAX] = {
+static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS */
-/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA, sCW},
-/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},
-/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
-/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS, sCL},
-/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA, sHA, sSA},
-/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},/* Can't have Stale cookie*/
-/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},/* 5.2.4 - Big TODO */
-/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},/* Can't come in orig dir */
-/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL, sHA, sCL},
-/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS},
-/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS},
-/* data/sack */ {sDS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS}
+/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
+/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW},
+/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},
+/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL},
+/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA},
+/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/
+/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */
+/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */
+/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL},
+/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
+/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
},
{
/* REPLY */
-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS */
-/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA, sIV},/* INIT in sCL Big TODO */
-/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA, sIV},
-/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL, sIV},
-/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR, sIV},
-/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA, sIV},
-/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV, sHA, sIV},
-/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA, sIV},/* Can't come in reply dir */
-/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV, sHA, sIV},
-/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV, sHA, sIV},
-/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sHA},
-/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA, sHA},
-/* data/sack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA, sHA},
+/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
+/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* INIT in sCL Big TODO */
+/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV},
+/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV},
+/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV},
+/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV},
+/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV},
+/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */
+/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV},
+/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV},
+/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
+/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sES},
}
};
@@ -158,6 +142,7 @@ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
}
#endif
+/* do_basic_checks ensures sch->length > 0, do not use before */
#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \
for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \
(offset) < (skb)->len && \
@@ -258,11 +243,6 @@ static int sctp_new_state(enum ip_conntrack_dir dir,
pr_debug("SCTP_CID_HEARTBEAT_ACK");
i = 10;
break;
- case SCTP_CID_DATA:
- case SCTP_CID_SACK:
- pr_debug("SCTP_CID_DATA/SACK");
- i = 11;
- break;
default:
/* Other chunks like DATA or SACK do not change the state */
pr_debug("Unknown chunk type, Will stay in %s\n",
@@ -316,9 +296,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
ih->init_tag);
ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag;
- } else if (sch->type == SCTP_CID_HEARTBEAT ||
- sch->type == SCTP_CID_DATA ||
- sch->type == SCTP_CID_SACK) {
+ } else if (sch->type == SCTP_CID_HEARTBEAT) {
pr_debug("Setting vtag %x for secondary conntrack\n",
sh->vtag);
ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
@@ -404,19 +382,19 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
if (!sctp_new(ct, skb, sh, dataoff))
return -NF_ACCEPT;
- } else {
- /* Check the verification tag (Sec 8.5) */
- if (!test_bit(SCTP_CID_INIT, map) &&
- !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) &&
- !test_bit(SCTP_CID_COOKIE_ECHO, map) &&
- !test_bit(SCTP_CID_ABORT, map) &&
- !test_bit(SCTP_CID_SHUTDOWN_ACK, map) &&
- !test_bit(SCTP_CID_HEARTBEAT, map) &&
- !test_bit(SCTP_CID_HEARTBEAT_ACK, map) &&
- sh->vtag != ct->proto.sctp.vtag[dir]) {
- pr_debug("Verification tag check failed\n");
- goto out;
- }
+ }
+
+ /* Check the verification tag (Sec 8.5) */
+ if (!test_bit(SCTP_CID_INIT, map) &&
+ !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) &&
+ !test_bit(SCTP_CID_COOKIE_ECHO, map) &&
+ !test_bit(SCTP_CID_ABORT, map) &&
+ !test_bit(SCTP_CID_SHUTDOWN_ACK, map) &&
+ !test_bit(SCTP_CID_HEARTBEAT, map) &&
+ !test_bit(SCTP_CID_HEARTBEAT_ACK, map) &&
+ sh->vtag != ct->proto.sctp.vtag[dir]) {
+ pr_debug("Verification tag check failed\n");
+ goto out;
}
old_state = new_state = SCTP_CONNTRACK_NONE;
@@ -424,22 +402,29 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
/* Special cases of Verification tag check (Sec 8.5.1) */
if (sch->type == SCTP_CID_INIT) {
- /* Sec 8.5.1 (A) */
+ /* (A) vtag MUST be zero */
if (sh->vtag != 0)
goto out_unlock;
} else if (sch->type == SCTP_CID_ABORT) {
- /* Sec 8.5.1 (B) */
- if (sh->vtag != ct->proto.sctp.vtag[dir] &&
- sh->vtag != ct->proto.sctp.vtag[!dir])
+ /* (B) vtag MUST match own vtag if T flag is unset OR
+ * MUST match peer's vtag if T flag is set
+ */
+ if ((!(sch->flags & SCTP_CHUNK_FLAG_T) &&
+ sh->vtag != ct->proto.sctp.vtag[dir]) ||
+ ((sch->flags & SCTP_CHUNK_FLAG_T) &&
+ sh->vtag != ct->proto.sctp.vtag[!dir]))
goto out_unlock;
} else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
- /* Sec 8.5.1 (C) */
- if (sh->vtag != ct->proto.sctp.vtag[dir] &&
- sh->vtag != ct->proto.sctp.vtag[!dir] &&
- sch->flags & SCTP_CHUNK_FLAG_T)
+ /* (C) vtag MUST match own vtag if T flag is unset OR
+ * MUST match peer's vtag if T flag is set
+ */
+ if ((!(sch->flags & SCTP_CHUNK_FLAG_T) &&
+ sh->vtag != ct->proto.sctp.vtag[dir]) ||
+ ((sch->flags & SCTP_CHUNK_FLAG_T) &&
+ sh->vtag != ct->proto.sctp.vtag[!dir]))
goto out_unlock;
} else if (sch->type == SCTP_CID_COOKIE_ECHO) {
- /* Sec 8.5.1 (D) */
+ /* (D) vtag must be same as init_vtag as found in INIT_ACK */
if (sh->vtag != ct->proto.sctp.vtag[dir])
goto out_unlock;
} else if (sch->type == SCTP_CID_HEARTBEAT) {
@@ -476,11 +461,6 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
} else if (ct->proto.sctp.flags & SCTP_FLAG_HEARTBEAT_VTAG_FAILED) {
ct->proto.sctp.flags &= ~SCTP_FLAG_HEARTBEAT_VTAG_FAILED;
}
- } else if (sch->type == SCTP_CID_DATA || sch->type == SCTP_CID_SACK) {
- if (ct->proto.sctp.vtag[dir] == 0) {
- pr_debug("Setting vtag %x for dir %d\n", sh->vtag, dir);
- ct->proto.sctp.vtag[dir] = sh->vtag;
- }
}
old_state = ct->proto.sctp.state;
@@ -518,8 +498,12 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
}
ct->proto.sctp.state = new_state;
- if (old_state != new_state)
+ if (old_state != new_state) {
nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
+ if (new_state == SCTP_CONNTRACK_ESTABLISHED &&
+ !test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
+ nf_conntrack_event_cache(IPCT_ASSURED, ct);
+ }
}
spin_unlock_bh(&ct->lock);
@@ -533,14 +517,6 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
- if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED &&
- dir == IP_CT_DIR_REPLY &&
- new_state == SCTP_CONNTRACK_ESTABLISHED) {
- pr_debug("Setting assured bit\n");
- set_bit(IPS_ASSURED_BIT, &ct->status);
- nf_conntrack_event_cache(IPCT_ASSURED, ct);
- }
-
return NF_ACCEPT;
out_unlock:
@@ -701,7 +677,6 @@ sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = {
[CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = { .type = NLA_U32 },
[CTA_TIMEOUT_SCTP_HEARTBEAT_SENT] = { .type = NLA_U32 },
[CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { .type = NLA_U32 },
- [CTA_TIMEOUT_SCTP_DATA_SENT] = { .type = NLA_U32 },
};
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 656631083177..3ac1af6f59fc 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1068,6 +1068,13 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
ct->proto.tcp.last_flags |=
IP_CT_EXP_CHALLENGE_ACK;
}
+
+ /* possible challenge ack reply to syn */
+ if (old_state == TCP_CONNTRACK_SYN_SENT &&
+ index == TCP_ACK_SET &&
+ dir == IP_CT_DIR_REPLY)
+ ct->proto.tcp.last_ack = ntohl(th->ack_seq);
+
spin_unlock_bh(&ct->lock);
nf_ct_l4proto_log_invalid(skb, ct, state,
"packet (index %d) in dir %d ignored, state %s",
@@ -1193,6 +1200,14 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
* segments we ignored. */
goto in_window;
}
+
+ /* Reset in response to a challenge-ack we let through earlier */
+ if (old_state == TCP_CONNTRACK_SYN_SENT &&
+ ct->proto.tcp.last_index == TCP_ACK_SET &&
+ ct->proto.tcp.last_dir == IP_CT_DIR_REPLY &&
+ ntohl(th->seq) == ct->proto.tcp.last_ack)
+ goto in_window;
+
break;
default:
/* Keep compilers happy. */
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 0250725e38a4..460294bd4b60 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -601,8 +601,6 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD,
NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT,
NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT,
- NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED,
- NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_DATA_SENT,
#endif
#ifdef CONFIG_NF_CT_PROTO_DCCP
NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST,
@@ -887,18 +885,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED] = {
- .procname = "nf_conntrack_sctp_timeout_heartbeat_acked",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_DATA_SENT] = {
- .procname = "nf_conntrack_sctp_timeout_data_sent",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
#endif
#ifdef CONFIG_NF_CT_PROTO_DCCP
[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST] = {
@@ -1042,8 +1028,6 @@ static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
XASSIGN(SHUTDOWN_RECD, sn);
XASSIGN(SHUTDOWN_ACK_SENT, sn);
XASSIGN(HEARTBEAT_SENT, sn);
- XASSIGN(HEARTBEAT_ACKED, sn);
- XASSIGN(DATA_SENT, sn);
#undef XASSIGN
#endif
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 832b881f7c17..8c09e4d12ac1 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -465,8 +465,9 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx)
return 0;
}
-static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
- struct nft_set *set)
+static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
+ struct nft_set *set,
+ const struct nft_set_desc *desc)
{
struct nft_trans *trans;
@@ -474,17 +475,28 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
if (trans == NULL)
return -ENOMEM;
- if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
+ if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) {
nft_trans_set_id(trans) =
ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
nft_activate_next(ctx->net, set);
}
nft_trans_set(trans) = set;
+ if (desc) {
+ nft_trans_set_update(trans) = true;
+ nft_trans_set_gc_int(trans) = desc->gc_int;
+ nft_trans_set_timeout(trans) = desc->timeout;
+ }
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
}
+static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
+ struct nft_set *set)
+{
+ return __nft_trans_set_add(ctx, msg_type, set, NULL);
+}
+
static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
{
int err;
@@ -3780,8 +3792,7 @@ static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags)
static const struct nft_set_ops *
nft_select_set_ops(const struct nft_ctx *ctx,
const struct nlattr * const nla[],
- const struct nft_set_desc *desc,
- enum nft_set_policies policy)
+ const struct nft_set_desc *desc)
{
struct nftables_pernet *nft_net = nft_pernet(ctx->net);
const struct nft_set_ops *ops, *bops;
@@ -3810,7 +3821,7 @@ nft_select_set_ops(const struct nft_ctx *ctx,
if (!ops->estimate(desc, flags, &est))
continue;
- switch (policy) {
+ switch (desc->policy) {
case NFT_SET_POL_PERFORMANCE:
if (est.lookup < best.lookup)
break;
@@ -4045,8 +4056,10 @@ static int nf_tables_fill_set_concat(struct sk_buff *skb,
static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
const struct nft_set *set, u16 event, u16 flags)
{
- struct nlmsghdr *nlh;
+ u64 timeout = READ_ONCE(set->timeout);
+ u32 gc_int = READ_ONCE(set->gc_int);
u32 portid = ctx->portid;
+ struct nlmsghdr *nlh;
struct nlattr *nest;
u32 seq = ctx->seq;
int i;
@@ -4082,13 +4095,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
nla_put_be32(skb, NFTA_SET_OBJ_TYPE, htonl(set->objtype)))
goto nla_put_failure;
- if (set->timeout &&
+ if (timeout &&
nla_put_be64(skb, NFTA_SET_TIMEOUT,
- nf_jiffies64_to_msecs(set->timeout),
+ nf_jiffies64_to_msecs(timeout),
NFTA_SET_PAD))
goto nla_put_failure;
- if (set->gc_int &&
- nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int)))
+ if (gc_int &&
+ nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(gc_int)))
goto nla_put_failure;
if (set->policy != NFT_SET_POL_PERFORMANCE) {
@@ -4389,15 +4402,94 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc,
return err;
}
+static int nft_set_expr_alloc(struct nft_ctx *ctx, struct nft_set *set,
+ const struct nlattr * const *nla,
+ struct nft_expr **exprs, int *num_exprs,
+ u32 flags)
+{
+ struct nft_expr *expr;
+ int err, i;
+
+ if (nla[NFTA_SET_EXPR]) {
+ expr = nft_set_elem_expr_alloc(ctx, set, nla[NFTA_SET_EXPR]);
+ if (IS_ERR(expr)) {
+ err = PTR_ERR(expr);
+ goto err_set_expr_alloc;
+ }
+ exprs[0] = expr;
+ (*num_exprs)++;
+ } else if (nla[NFTA_SET_EXPRESSIONS]) {
+ struct nlattr *tmp;
+ int left;
+
+ if (!(flags & NFT_SET_EXPR)) {
+ err = -EINVAL;
+ goto err_set_expr_alloc;
+ }
+ i = 0;
+ nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
+ if (i == NFT_SET_EXPR_MAX) {
+ err = -E2BIG;
+ goto err_set_expr_alloc;
+ }
+ if (nla_type(tmp) != NFTA_LIST_ELEM) {
+ err = -EINVAL;
+ goto err_set_expr_alloc;
+ }
+ expr = nft_set_elem_expr_alloc(ctx, set, tmp);
+ if (IS_ERR(expr)) {
+ err = PTR_ERR(expr);
+ goto err_set_expr_alloc;
+ }
+ exprs[i++] = expr;
+ (*num_exprs)++;
+ }
+ }
+
+ return 0;
+
+err_set_expr_alloc:
+ for (i = 0; i < *num_exprs; i++)
+ nft_expr_destroy(ctx, exprs[i]);
+
+ return err;
+}
+
+static bool nft_set_is_same(const struct nft_set *set,
+ const struct nft_set_desc *desc,
+ struct nft_expr *exprs[], u32 num_exprs, u32 flags)
+{
+ int i;
+
+ if (set->ktype != desc->ktype ||
+ set->dtype != desc->dtype ||
+ set->flags != flags ||
+ set->klen != desc->klen ||
+ set->dlen != desc->dlen ||
+ set->field_count != desc->field_count ||
+ set->num_exprs != num_exprs)
+ return false;
+
+ for (i = 0; i < desc->field_count; i++) {
+ if (set->field_len[i] != desc->field_len[i])
+ return false;
+ }
+
+ for (i = 0; i < num_exprs; i++) {
+ if (set->exprs[i]->ops != exprs[i]->ops)
+ return false;
+ }
+
+ return true;
+}
+
static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
- u32 ktype, dtype, flags, policy, gc_int, objtype;
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
u8 family = info->nfmsg->nfgen_family;
const struct nft_set_ops *ops;
- struct nft_expr *expr = NULL;
struct net *net = info->net;
struct nft_set_desc desc;
struct nft_table *table;
@@ -4405,10 +4497,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
struct nft_set *set;
struct nft_ctx ctx;
size_t alloc_size;
- u64 timeout;
+ int num_exprs = 0;
char *name;
int err, i;
u16 udlen;
+ u32 flags;
u64 size;
if (nla[NFTA_SET_TABLE] == NULL ||
@@ -4419,10 +4512,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
memset(&desc, 0, sizeof(desc));
- ktype = NFT_DATA_VALUE;
+ desc.ktype = NFT_DATA_VALUE;
if (nla[NFTA_SET_KEY_TYPE] != NULL) {
- ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
- if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
+ desc.ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
+ if ((desc.ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
return -EINVAL;
}
@@ -4447,17 +4540,17 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
return -EOPNOTSUPP;
}
- dtype = 0;
+ desc.dtype = 0;
if (nla[NFTA_SET_DATA_TYPE] != NULL) {
if (!(flags & NFT_SET_MAP))
return -EINVAL;
- dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
- if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
- dtype != NFT_DATA_VERDICT)
+ desc.dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
+ if ((desc.dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
+ desc.dtype != NFT_DATA_VERDICT)
return -EINVAL;
- if (dtype != NFT_DATA_VERDICT) {
+ if (desc.dtype != NFT_DATA_VERDICT) {
if (nla[NFTA_SET_DATA_LEN] == NULL)
return -EINVAL;
desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
@@ -4472,34 +4565,34 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (!(flags & NFT_SET_OBJECT))
return -EINVAL;
- objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
- if (objtype == NFT_OBJECT_UNSPEC ||
- objtype > NFT_OBJECT_MAX)
+ desc.objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
+ if (desc.objtype == NFT_OBJECT_UNSPEC ||
+ desc.objtype > NFT_OBJECT_MAX)
return -EOPNOTSUPP;
} else if (flags & NFT_SET_OBJECT)
return -EINVAL;
else
- objtype = NFT_OBJECT_UNSPEC;
+ desc.objtype = NFT_OBJECT_UNSPEC;
- timeout = 0;
+ desc.timeout = 0;
if (nla[NFTA_SET_TIMEOUT] != NULL) {
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
- err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout);
+ err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout);
if (err)
return err;
}
- gc_int = 0;
+ desc.gc_int = 0;
if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
- gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
+ desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
}
- policy = NFT_SET_POL_PERFORMANCE;
+ desc.policy = NFT_SET_POL_PERFORMANCE;
if (nla[NFTA_SET_POLICY] != NULL)
- policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
+ desc.policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
if (nla[NFTA_SET_DESC] != NULL) {
err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]);
@@ -4531,6 +4624,8 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
return PTR_ERR(set);
}
} else {
+ struct nft_expr *exprs[NFT_SET_EXPR_MAX] = {};
+
if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
return -EEXIST;
@@ -4538,13 +4633,29 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- return 0;
+ err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags);
+ if (err < 0)
+ return err;
+
+ err = 0;
+ if (!nft_set_is_same(set, &desc, exprs, num_exprs, flags)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
+ err = -EEXIST;
+ }
+
+ for (i = 0; i < num_exprs; i++)
+ nft_expr_destroy(&ctx, exprs[i]);
+
+ if (err < 0)
+ return err;
+
+ return __nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set, &desc);
}
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
return -ENOENT;
- ops = nft_select_set_ops(&ctx, nla, &desc, policy);
+ ops = nft_select_set_ops(&ctx, nla, &desc);
if (IS_ERR(ops))
return PTR_ERR(ops);
@@ -4584,18 +4695,18 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
set->table = table;
write_pnet(&set->net, net);
set->ops = ops;
- set->ktype = ktype;
+ set->ktype = desc.ktype;
set->klen = desc.klen;
- set->dtype = dtype;
- set->objtype = objtype;
+ set->dtype = desc.dtype;
+ set->objtype = desc.objtype;
set->dlen = desc.dlen;
set->flags = flags;
set->size = desc.size;
- set->policy = policy;
+ set->policy = desc.policy;
set->udlen = udlen;
set->udata = udata;
- set->timeout = timeout;
- set->gc_int = gc_int;
+ set->timeout = desc.timeout;
+ set->gc_int = desc.gc_int;
set->field_count = desc.field_count;
for (i = 0; i < desc.field_count; i++)
@@ -4605,43 +4716,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (err < 0)
goto err_set_init;
- if (nla[NFTA_SET_EXPR]) {
- expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]);
- if (IS_ERR(expr)) {
- err = PTR_ERR(expr);
- goto err_set_expr_alloc;
- }
- set->exprs[0] = expr;
- set->num_exprs++;
- } else if (nla[NFTA_SET_EXPRESSIONS]) {
- struct nft_expr *expr;
- struct nlattr *tmp;
- int left;
-
- if (!(flags & NFT_SET_EXPR)) {
- err = -EINVAL;
- goto err_set_expr_alloc;
- }
- i = 0;
- nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
- if (i == NFT_SET_EXPR_MAX) {
- err = -E2BIG;
- goto err_set_expr_alloc;
- }
- if (nla_type(tmp) != NFTA_LIST_ELEM) {
- err = -EINVAL;
- goto err_set_expr_alloc;
- }
- expr = nft_set_elem_expr_alloc(&ctx, set, tmp);
- if (IS_ERR(expr)) {
- err = PTR_ERR(expr);
- goto err_set_expr_alloc;
- }
- set->exprs[i++] = expr;
- set->num_exprs++;
- }
- }
+ err = nft_set_expr_alloc(&ctx, set, nla, set->exprs, &num_exprs, flags);
+ if (err < 0)
+ goto err_set_destroy;
+ set->num_exprs = num_exprs;
set->handle = nf_tables_alloc_handle(table);
err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
@@ -4655,7 +4734,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
err_set_expr_alloc:
for (i = 0; i < set->num_exprs; i++)
nft_expr_destroy(&ctx, set->exprs[i]);
-
+err_set_destroy:
ops->destroy(set);
err_set_init:
kfree(set->name);
@@ -6008,7 +6087,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return err;
} else if (set->flags & NFT_SET_TIMEOUT &&
!(flags & NFT_SET_ELEM_INTERVAL_END)) {
- timeout = set->timeout;
+ timeout = READ_ONCE(set->timeout);
}
expiration = 0;
@@ -6109,7 +6188,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
goto err_parse_key_end;
- if (timeout != set->timeout) {
+ if (timeout != READ_ONCE(set->timeout)) {
err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
if (err < 0)
goto err_parse_key_end;
@@ -9031,14 +9110,20 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_flow_rule_destroy(nft_trans_flow_rule(trans));
break;
case NFT_MSG_NEWSET:
- nft_clear(net, nft_trans_set(trans));
- /* This avoids hitting -EBUSY when deleting the table
- * from the transaction.
- */
- if (nft_set_is_anonymous(nft_trans_set(trans)) &&
- !list_empty(&nft_trans_set(trans)->bindings))
- trans->ctx.table->use--;
+ if (nft_trans_set_update(trans)) {
+ struct nft_set *set = nft_trans_set(trans);
+ WRITE_ONCE(set->timeout, nft_trans_set_timeout(trans));
+ WRITE_ONCE(set->gc_int, nft_trans_set_gc_int(trans));
+ } else {
+ nft_clear(net, nft_trans_set(trans));
+ /* This avoids hitting -EBUSY when deleting the table
+ * from the transaction.
+ */
+ if (nft_set_is_anonymous(nft_trans_set(trans)) &&
+ !list_empty(&nft_trans_set(trans)->bindings))
+ trans->ctx.table->use--;
+ }
nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
NFT_MSG_NEWSET, GFP_KERNEL);
nft_trans_destroy(trans);
@@ -9260,6 +9345,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSET:
+ if (nft_trans_set_update(trans)) {
+ nft_trans_destroy(trans);
+ break;
+ }
trans->ctx.table->use--;
if (nft_trans_set_bound(trans)) {
nft_trans_destroy(trans);
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 17b418a5a593..3a3c7746e88f 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -63,7 +63,7 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
return false;
if (offset + len > VLAN_ETH_HLEN + vlan_hlen)
- ethlen -= offset + len - VLAN_ETH_HLEN + vlan_hlen;
+ ethlen -= offset + len - VLAN_ETH_HLEN - vlan_hlen;
memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen);
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 7325bee7d144..19ea4d3c3553 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -38,10 +38,12 @@ static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe)
return !nft_rbtree_interval_end(rbe);
}
-static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
- const struct nft_rbtree_elem *interval)
+static int nft_rbtree_cmp(const struct nft_set *set,
+ const struct nft_rbtree_elem *e1,
+ const struct nft_rbtree_elem *e2)
{
- return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
+ return memcmp(nft_set_ext_key(&e1->ext), nft_set_ext_key(&e2->ext),
+ set->klen);
}
static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
@@ -52,7 +54,6 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
const struct nft_rbtree_elem *rbe, *interval = NULL;
u8 genmask = nft_genmask_cur(net);
const struct rb_node *parent;
- const void *this;
int d;
parent = rcu_dereference_raw(priv->root.rb_node);
@@ -62,12 +63,11 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- this = nft_set_ext_key(&rbe->ext);
- d = memcmp(this, key, set->klen);
+ d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen);
if (d < 0) {
parent = rcu_dereference_raw(parent->rb_left);
if (interval &&
- nft_rbtree_equal(set, this, interval) &&
+ !nft_rbtree_cmp(set, rbe, interval) &&
nft_rbtree_interval_end(rbe) &&
nft_rbtree_interval_start(interval))
continue;
@@ -215,154 +215,216 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
return rbe;
}
+static int nft_rbtree_gc_elem(const struct nft_set *__set,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
+{
+ struct nft_set *set = (struct nft_set *)__set;
+ struct rb_node *prev = rb_prev(&rbe->node);
+ struct nft_rbtree_elem *rbe_prev;
+ struct nft_set_gc_batch *gcb;
+
+ gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
+ if (!gcb)
+ return -ENOMEM;
+
+ /* search for expired end interval coming before this element. */
+ do {
+ rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
+ if (nft_rbtree_interval_end(rbe_prev))
+ break;
+
+ prev = rb_prev(prev);
+ } while (prev != NULL);
+
+ rb_erase(&rbe_prev->node, &priv->root);
+ rb_erase(&rbe->node, &priv->root);
+ atomic_sub(2, &set->nelems);
+
+ nft_set_gc_batch_add(gcb, rbe);
+ nft_set_gc_batch_complete(gcb);
+
+ return 0;
+}
+
+static bool nft_rbtree_update_first(const struct nft_set *set,
+ struct nft_rbtree_elem *rbe,
+ struct rb_node *first)
+{
+ struct nft_rbtree_elem *first_elem;
+
+ first_elem = rb_entry(first, struct nft_rbtree_elem, node);
+ /* this element is closest to where the new element is to be inserted:
+ * update the first element for the node list path.
+ */
+ if (nft_rbtree_cmp(set, rbe, first_elem) < 0)
+ return true;
+
+ return false;
+}
+
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *new,
struct nft_set_ext **ext)
{
- bool overlap = false, dup_end_left = false, dup_end_right = false;
+ struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
+ struct rb_node *node, *parent, **p, *first = NULL;
struct nft_rbtree *priv = nft_set_priv(set);
u8 genmask = nft_genmask_next(net);
- struct nft_rbtree_elem *rbe;
- struct rb_node *parent, **p;
- int d;
+ int d, err;
- /* Detect overlaps as we descend the tree. Set the flag in these cases:
- *
- * a1. _ _ __>| ?_ _ __| (insert end before existing end)
- * a2. _ _ ___| ?_ _ _>| (insert end after existing end)
- * a3. _ _ ___? >|_ _ __| (insert start before existing end)
- *
- * and clear it later on, as we eventually reach the points indicated by
- * '?' above, in the cases described below. We'll always meet these
- * later, locally, due to tree ordering, and overlaps for the intervals
- * that are the closest together are always evaluated last.
- *
- * b1. _ _ __>| !_ _ __| (insert end before existing start)
- * b2. _ _ ___| !_ _ _>| (insert end after existing start)
- * b3. _ _ ___! >|_ _ __| (insert start after existing end, as a leaf)
- * '--' no nodes falling in this range
- * b4. >|_ _ ! (insert start before existing start)
- *
- * Case a3. resolves to b3.:
- * - if the inserted start element is the leftmost, because the '0'
- * element in the tree serves as end element
- * - otherwise, if an existing end is found immediately to the left. If
- * there are existing nodes in between, we need to further descend the
- * tree before we can conclude the new start isn't causing an overlap
- *
- * or to b4., which, preceded by a3., means we already traversed one or
- * more existing intervals entirely, from the right.
- *
- * For a new, rightmost pair of elements, we'll hit cases b3. and b2.,
- * in that order.
- *
- * The flag is also cleared in two special cases:
- *
- * b5. |__ _ _!|<_ _ _ (insert start right before existing end)
- * b6. |__ _ >|!__ _ _ (insert end right after existing start)
- *
- * which always happen as last step and imply that no further
- * overlapping is possible.
- *
- * Another special case comes from the fact that start elements matching
- * an already existing start element are allowed: insertion is not
- * performed but we return -EEXIST in that case, and the error will be
- * cleared by the caller if NLM_F_EXCL is not present in the request.
- * This way, request for insertion of an exact overlap isn't reported as
- * error to userspace if not desired.
- *
- * However, if the existing start matches a pre-existing start, but the
- * end element doesn't match the corresponding pre-existing end element,
- * we need to report a partial overlap. This is a local condition that
- * can be noticed without need for a tracking flag, by checking for a
- * local duplicated end for a corresponding start, from left and right,
- * separately.
+ /* Descend the tree to search for an existing element greater than the
+ * key value to insert that is greater than the new element. This is the
+ * first element to walk the ordered elements to find possible overlap.
*/
-
parent = NULL;
p = &priv->root.rb_node;
while (*p != NULL) {
parent = *p;
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- d = memcmp(nft_set_ext_key(&rbe->ext),
- nft_set_ext_key(&new->ext),
- set->klen);
+ d = nft_rbtree_cmp(set, rbe, new);
+
if (d < 0) {
p = &parent->rb_left;
-
- if (nft_rbtree_interval_start(new)) {
- if (nft_rbtree_interval_end(rbe) &&
- nft_set_elem_active(&rbe->ext, genmask) &&
- !nft_set_elem_expired(&rbe->ext) && !*p)
- overlap = false;
- } else {
- if (dup_end_left && !*p)
- return -ENOTEMPTY;
-
- overlap = nft_rbtree_interval_end(rbe) &&
- nft_set_elem_active(&rbe->ext,
- genmask) &&
- !nft_set_elem_expired(&rbe->ext);
-
- if (overlap) {
- dup_end_right = true;
- continue;
- }
- }
} else if (d > 0) {
- p = &parent->rb_right;
+ if (!first ||
+ nft_rbtree_update_first(set, rbe, first))
+ first = &rbe->node;
- if (nft_rbtree_interval_end(new)) {
- if (dup_end_right && !*p)
- return -ENOTEMPTY;
-
- overlap = nft_rbtree_interval_end(rbe) &&
- nft_set_elem_active(&rbe->ext,
- genmask) &&
- !nft_set_elem_expired(&rbe->ext);
-
- if (overlap) {
- dup_end_left = true;
- continue;
- }
- } else if (nft_set_elem_active(&rbe->ext, genmask) &&
- !nft_set_elem_expired(&rbe->ext)) {
- overlap = nft_rbtree_interval_end(rbe);
- }
+ p = &parent->rb_right;
} else {
- if (nft_rbtree_interval_end(rbe) &&
- nft_rbtree_interval_start(new)) {
+ if (nft_rbtree_interval_end(rbe))
p = &parent->rb_left;
-
- if (nft_set_elem_active(&rbe->ext, genmask) &&
- !nft_set_elem_expired(&rbe->ext))
- overlap = false;
- } else if (nft_rbtree_interval_start(rbe) &&
- nft_rbtree_interval_end(new)) {
+ else
p = &parent->rb_right;
+ }
+ }
+
+ if (!first)
+ first = rb_first(&priv->root);
+
+ /* Detect overlap by going through the list of valid tree nodes.
+ * Values stored in the tree are in reversed order, starting from
+ * highest to lowest value.
+ */
+ for (node = first; node != NULL; node = rb_next(node)) {
+ rbe = rb_entry(node, struct nft_rbtree_elem, node);
- if (nft_set_elem_active(&rbe->ext, genmask) &&
- !nft_set_elem_expired(&rbe->ext))
- overlap = false;
- } else if (nft_set_elem_active(&rbe->ext, genmask) &&
- !nft_set_elem_expired(&rbe->ext)) {
- *ext = &rbe->ext;
- return -EEXIST;
- } else {
- overlap = false;
- if (nft_rbtree_interval_end(rbe))
- p = &parent->rb_left;
- else
- p = &parent->rb_right;
+ if (!nft_set_elem_active(&rbe->ext, genmask))
+ continue;
+
+ /* perform garbage collection to avoid bogus overlap reports. */
+ if (nft_set_elem_expired(&rbe->ext)) {
+ err = nft_rbtree_gc_elem(set, priv, rbe);
+ if (err < 0)
+ return err;
+
+ continue;
+ }
+
+ d = nft_rbtree_cmp(set, rbe, new);
+ if (d == 0) {
+ /* Matching end element: no need to look for an
+ * overlapping greater or equal element.
+ */
+ if (nft_rbtree_interval_end(rbe)) {
+ rbe_le = rbe;
+ break;
+ }
+
+ /* first element that is greater or equal to key value. */
+ if (!rbe_ge) {
+ rbe_ge = rbe;
+ continue;
+ }
+
+ /* this is a closer more or equal element, update it. */
+ if (nft_rbtree_cmp(set, rbe_ge, new) != 0) {
+ rbe_ge = rbe;
+ continue;
+ }
+
+ /* element is equal to key value, make sure flags are
+ * the same, an existing more or equal start element
+ * must not be replaced by more or equal end element.
+ */
+ if ((nft_rbtree_interval_start(new) &&
+ nft_rbtree_interval_start(rbe_ge)) ||
+ (nft_rbtree_interval_end(new) &&
+ nft_rbtree_interval_end(rbe_ge))) {
+ rbe_ge = rbe;
+ continue;
}
+ } else if (d > 0) {
+ /* annotate element greater than the new element. */
+ rbe_ge = rbe;
+ continue;
+ } else if (d < 0) {
+ /* annotate element less than the new element. */
+ rbe_le = rbe;
+ break;
}
+ }
- dup_end_left = dup_end_right = false;
+ /* - new start element matching existing start element: full overlap
+ * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given.
+ */
+ if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) &&
+ nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) {
+ *ext = &rbe_ge->ext;
+ return -EEXIST;
}
- if (overlap)
+ /* - new end element matching existing end element: full overlap
+ * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given.
+ */
+ if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) &&
+ nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) {
+ *ext = &rbe_le->ext;
+ return -EEXIST;
+ }
+
+ /* - new start element with existing closest, less or equal key value
+ * being a start element: partial overlap, reported as -ENOTEMPTY.
+ * Anonymous sets allow for two consecutive start element since they
+ * are constant, skip them to avoid bogus overlap reports.
+ */
+ if (!nft_set_is_anonymous(set) && rbe_le &&
+ nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new))
+ return -ENOTEMPTY;
+
+ /* - new end element with existing closest, less or equal key value
+ * being a end element: partial overlap, reported as -ENOTEMPTY.
+ */
+ if (rbe_le &&
+ nft_rbtree_interval_end(rbe_le) && nft_rbtree_interval_end(new))
return -ENOTEMPTY;
+ /* - new end element with existing closest, greater or equal key value
+ * being an end element: partial overlap, reported as -ENOTEMPTY
+ */
+ if (rbe_ge &&
+ nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new))
+ return -ENOTEMPTY;
+
+ /* Accepted element: pick insertion point depending on key value */
+ parent = NULL;
+ p = &priv->root.rb_node;
+ while (*p != NULL) {
+ parent = *p;
+ rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+ d = nft_rbtree_cmp(set, rbe, new);
+
+ if (d < 0)
+ p = &parent->rb_left;
+ else if (d > 0)
+ p = &parent->rb_right;
+ else if (nft_rbtree_interval_end(rbe))
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
+ }
+
rb_link_node_rcu(&new->node, parent, p);
rb_insert_color(&new->node, &priv->root);
return 0;
@@ -501,23 +563,37 @@ static void nft_rbtree_gc(struct work_struct *work)
struct nft_rbtree *priv;
struct rb_node *node;
struct nft_set *set;
+ struct net *net;
+ u8 genmask;
priv = container_of(work, struct nft_rbtree, gc_work.work);
set = nft_set_container_of(priv);
+ net = read_pnet(&set->net);
+ genmask = nft_genmask_cur(net);
write_lock_bh(&priv->lock);
write_seqcount_begin(&priv->count);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
rbe = rb_entry(node, struct nft_rbtree_elem, node);
+ if (!nft_set_elem_active(&rbe->ext, genmask))
+ continue;
+
+ /* elements are reversed in the rbtree for historical reasons,
+ * from highest to lowest value, that is why end element is
+ * always visited before the start element.
+ */
if (nft_rbtree_interval_end(rbe)) {
rbe_end = rbe;
continue;
}
if (!nft_set_elem_expired(&rbe->ext))
continue;
- if (nft_set_elem_mark_busy(&rbe->ext))
+
+ if (nft_set_elem_mark_busy(&rbe->ext)) {
+ rbe_end = NULL;
continue;
+ }
if (rbe_prev) {
rb_erase(&rbe_prev->node, &priv->root);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index bca2a470ccad..c64277659753 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -580,7 +580,9 @@ static int netlink_insert(struct sock *sk, u32 portid)
if (nlk_sk(sk)->bound)
goto err;
- nlk_sk(sk)->portid = portid;
+ /* portid can be read locklessly from netlink_getname(). */
+ WRITE_ONCE(nlk_sk(sk)->portid, portid);
+
sock_hold(sk);
err = __netlink_insert(table, sk);
@@ -1096,9 +1098,11 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
return -EINVAL;
if (addr->sa_family == AF_UNSPEC) {
- sk->sk_state = NETLINK_UNCONNECTED;
- nlk->dst_portid = 0;
- nlk->dst_group = 0;
+ /* paired with READ_ONCE() in netlink_getsockbyportid() */
+ WRITE_ONCE(sk->sk_state, NETLINK_UNCONNECTED);
+ /* dst_portid and dst_group can be read locklessly */
+ WRITE_ONCE(nlk->dst_portid, 0);
+ WRITE_ONCE(nlk->dst_group, 0);
return 0;
}
if (addr->sa_family != AF_NETLINK)
@@ -1119,9 +1123,11 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
err = netlink_autobind(sock);
if (err == 0) {
- sk->sk_state = NETLINK_CONNECTED;
- nlk->dst_portid = nladdr->nl_pid;
- nlk->dst_group = ffs(nladdr->nl_groups);
+ /* paired with READ_ONCE() in netlink_getsockbyportid() */
+ WRITE_ONCE(sk->sk_state, NETLINK_CONNECTED);
+ /* dst_portid and dst_group can be read locklessly */
+ WRITE_ONCE(nlk->dst_portid, nladdr->nl_pid);
+ WRITE_ONCE(nlk->dst_group, ffs(nladdr->nl_groups));
}
return err;
@@ -1138,10 +1144,12 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
nladdr->nl_pad = 0;
if (peer) {
- nladdr->nl_pid = nlk->dst_portid;
- nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
+ /* Paired with WRITE_ONCE() in netlink_connect() */
+ nladdr->nl_pid = READ_ONCE(nlk->dst_portid);
+ nladdr->nl_groups = netlink_group_mask(READ_ONCE(nlk->dst_group));
} else {
- nladdr->nl_pid = nlk->portid;
+ /* Paired with WRITE_ONCE() in netlink_insert() */
+ nladdr->nl_pid = READ_ONCE(nlk->portid);
netlink_lock_table();
nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
netlink_unlock_table();
@@ -1168,8 +1176,9 @@ static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
/* Don't bother queuing skb if kernel socket has no input function */
nlk = nlk_sk(sock);
- if (sock->sk_state == NETLINK_CONNECTED &&
- nlk->dst_portid != nlk_sk(ssk)->portid) {
+ /* dst_portid and sk_state can be changed in netlink_connect() */
+ if (READ_ONCE(sock->sk_state) == NETLINK_CONNECTED &&
+ READ_ONCE(nlk->dst_portid) != nlk_sk(ssk)->portid) {
sock_put(sock);
return ERR_PTR(-ECONNREFUSED);
}
@@ -1886,8 +1895,9 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
goto out;
netlink_skb_flags |= NETLINK_SKB_DST;
} else {
- dst_portid = nlk->dst_portid;
- dst_group = nlk->dst_group;
+ /* Paired with WRITE_ONCE() in netlink_connect() */
+ dst_portid = READ_ONCE(nlk->dst_portid);
+ dst_group = READ_ONCE(nlk->dst_group);
}
/* Paired with WRITE_ONCE() in netlink_insert() */
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 6f7f4392cffb..5a4cb796150f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -400,6 +400,11 @@ static int nr_listen(struct socket *sock, int backlog)
struct sock *sk = sock->sk;
lock_sock(sk);
+ if (sock->state != SS_UNCONNECTED) {
+ release_sock(sk);
+ return -EINVAL;
+ }
+
if (sk->sk_state != TCP_LISTEN) {
memset(&nr_sk(sk)->user_addr, 0, AX25_ADDR_LEN);
sk->sk_max_ack_backlog = backlog;
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index a8da88db7893..4e7c968cde2d 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -121,6 +121,7 @@ static void nr_heartbeat_expiry(struct timer_list *t)
is accepted() it isn't 'dead' so doesn't get removed. */
if (sock_flag(sk, SOCK_DESTROY) ||
(sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
+ sock_hold(sk);
bh_unlock_sock(sk);
nr_destroy_socket(sk);
goto out;
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index 3364caabef8b..a27e1842b2a0 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -157,6 +157,7 @@ static void local_cleanup(struct nfc_llcp_local *local)
cancel_work_sync(&local->rx_work);
cancel_work_sync(&local->timeout_work);
kfree_skb(local->rx_pending);
+ local->rx_pending = NULL;
del_timer_sync(&local->sdreq_timer);
cancel_work_sync(&local->sdreq_timeout_work);
nfc_llcp_free_sdp_tlv_list(&local->pending_sdreqs);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 9d91087b9399..1fc339084d89 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -1497,6 +1497,7 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
u32 dev_idx, se_idx;
u8 *apdu;
size_t apdu_len;
+ int rc;
if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
!info->attrs[NFC_ATTR_SE_INDEX] ||
@@ -1510,25 +1511,37 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
if (!dev)
return -ENODEV;
- if (!dev->ops || !dev->ops->se_io)
- return -ENOTSUPP;
+ if (!dev->ops || !dev->ops->se_io) {
+ rc = -EOPNOTSUPP;
+ goto put_dev;
+ }
apdu_len = nla_len(info->attrs[NFC_ATTR_SE_APDU]);
- if (apdu_len == 0)
- return -EINVAL;
+ if (apdu_len == 0) {
+ rc = -EINVAL;
+ goto put_dev;
+ }
apdu = nla_data(info->attrs[NFC_ATTR_SE_APDU]);
- if (!apdu)
- return -EINVAL;
+ if (!apdu) {
+ rc = -EINVAL;
+ goto put_dev;
+ }
ctx = kzalloc(sizeof(struct se_io_ctx), GFP_KERNEL);
- if (!ctx)
- return -ENOMEM;
+ if (!ctx) {
+ rc = -ENOMEM;
+ goto put_dev;
+ }
ctx->dev_idx = dev_idx;
ctx->se_idx = se_idx;
- return nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx);
+ rc = nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx);
+
+put_dev:
+ nfc_put_device(dev);
+ return rc;
}
static int nfc_genl_vendor_cmd(struct sk_buff *skb,
@@ -1551,14 +1564,21 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]);
dev = nfc_get_device(dev_idx);
- if (!dev || !dev->vendor_cmds || !dev->n_vendor_cmds)
+ if (!dev)
return -ENODEV;
+ if (!dev->vendor_cmds || !dev->n_vendor_cmds) {
+ err = -ENODEV;
+ goto put_dev;
+ }
+
if (info->attrs[NFC_ATTR_VENDOR_DATA]) {
data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]);
data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]);
- if (data_len == 0)
- return -EINVAL;
+ if (data_len == 0) {
+ err = -EINVAL;
+ goto put_dev;
+ }
} else {
data = NULL;
data_len = 0;
@@ -1573,10 +1593,14 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
dev->cur_cmd_info = info;
err = cmd->doit(dev, data, data_len);
dev->cur_cmd_info = NULL;
- return err;
+ goto put_dev;
}
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+
+put_dev:
+ nfc_put_device(dev);
+ return err;
}
/* message building helper */
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 9ca721c9fa71..fcee6012293b 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1004,14 +1004,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
key = kzalloc(sizeof(*key), GFP_KERNEL);
if (!key) {
error = -ENOMEM;
- goto err_kfree_key;
+ goto err_kfree_flow;
}
ovs_match_init(&match, key, false, &mask);
error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
a[OVS_FLOW_ATTR_MASK], log);
if (error)
- goto err_kfree_flow;
+ goto err_kfree_key;
ovs_flow_mask_key(&new_flow->key, key, true, &mask);
@@ -1019,14 +1019,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
key, log);
if (error)
- goto err_kfree_flow;
+ goto err_kfree_key;
/* Validate actions. */
error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
&new_flow->key, &acts, log);
if (error) {
OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
- goto err_kfree_flow;
+ goto err_kfree_key;
}
reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
@@ -1126,10 +1126,10 @@ err_unlock_ovs:
kfree_skb(reply);
err_kfree_acts:
ovs_nla_free_flow_actions(acts);
-err_kfree_flow:
- ovs_flow_free(new_flow, false);
err_kfree_key:
kfree(key);
+err_kfree_flow:
+ ovs_flow_free(new_flow, false);
error:
return error;
}
@@ -1861,7 +1861,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
if (!vport->upcall_stats) {
err = -ENOMEM;
- goto err_destroy_portids;
+ goto err_destroy_vport;
}
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
@@ -1876,6 +1876,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_notify(&dp_datapath_genl_family, reply, info);
return 0;
+err_destroy_vport:
+ ovs_dp_detach_port(vport);
err_destroy_portids:
kfree(rcu_dereference_raw(dp->upcall_portids));
err_unlock_and_destroy_meters:
@@ -2323,7 +2325,7 @@ restart:
vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
if (!vport->upcall_stats) {
err = -ENOMEM;
- goto exit_unlock_free;
+ goto exit_unlock_free_vport;
}
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
@@ -2343,6 +2345,8 @@ restart:
ovs_notify(&dp_vport_genl_family, reply, info);
return 0;
+exit_unlock_free_vport:
+ ovs_dp_detach_port(vport);
exit_unlock_free:
ovs_unlock();
kfree_skb(reply);
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 6e38f68f88c2..f2698d2316df 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -449,7 +449,7 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
err = attach_meter(meter_tbl, meter);
if (err)
- goto exit_unlock;
+ goto exit_free_old_meter;
ovs_unlock();
@@ -472,6 +472,8 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
genlmsg_end(reply, ovs_reply_header);
return genlmsg_reply(reply, info);
+exit_free_old_meter:
+ ovs_meter_free(old_meter);
exit_unlock:
ovs_unlock();
nlmsg_free(reply);
diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index 1990d496fcfc..e595079c2caf 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -83,7 +83,10 @@ static struct qrtr_node *node_get(unsigned int node_id)
node->id = node_id;
- radix_tree_insert(&nodes, node_id, node);
+ if (radix_tree_insert(&nodes, node_id, node)) {
+ kfree(node);
+ return NULL;
+ }
return node;
}
diff --git a/net/rds/message.c b/net/rds/message.c
index b47e4f0a1639..c19c93561227 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -104,9 +104,9 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs,
spin_lock_irqsave(&q->lock, flags);
head = &q->zcookie_head;
if (!list_empty(head)) {
- info = list_entry(head, struct rds_msg_zcopy_info,
- rs_zcookie_next);
- if (info && rds_zcookie_add(info, cookie)) {
+ info = list_first_entry(head, struct rds_msg_zcopy_info,
+ rs_zcookie_next);
+ if (rds_zcookie_add(info, cookie)) {
spin_unlock_irqrestore(&q->lock, flags);
kfree(rds_info_from_znotifier(znotif));
/* caller invokes rds_wake_sk_sleep() */
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 36fefc3957d7..ca2b17f32670 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -488,6 +488,12 @@ static int rose_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
+ lock_sock(sk);
+ if (sock->state != SS_UNCONNECTED) {
+ release_sock(sk);
+ return -EINVAL;
+ }
+
if (sk->sk_state != TCP_LISTEN) {
struct rose_sock *rose = rose_sk(sk);
@@ -497,8 +503,10 @@ static int rose_listen(struct socket *sock, int backlog)
memset(rose->dest_digis, 0, AX25_ADDR_LEN * ROSE_MAX_DIGIS);
sk->sk_max_ack_backlog = backlog;
sk->sk_state = TCP_LISTEN;
+ release_sock(sk);
return 0;
}
+ release_sock(sk);
return -EOPNOTSUPP;
}
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index e76d3459d78e..ac5caf5a48e1 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -10,6 +10,7 @@ rxrpc-y := \
call_accept.o \
call_event.o \
call_object.o \
+ call_state.o \
conn_client.o \
conn_event.o \
conn_object.o \
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 7ea576f6ba4b..ebbd4a1c3f86 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -155,10 +155,10 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
if (service_id) {
write_lock(&local->services_lock);
- if (rcu_access_pointer(local->service))
+ if (local->service)
goto service_in_use;
rx->local = local;
- rcu_assign_pointer(local->service, rx);
+ local->service = rx;
write_unlock(&local->services_lock);
rx->sk.sk_state = RXRPC_SERVER_BOUND;
@@ -328,7 +328,6 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
mutex_unlock(&call->user_mutex);
}
- rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp);
_leave(" = %p", call);
return call;
}
@@ -374,13 +373,17 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call);
* @sock: The socket the call is on
* @call: The call to check
*
- * Allow a kernel service to find out whether a call is still alive -
- * ie. whether it has completed.
+ * Allow a kernel service to find out whether a call is still alive - whether
+ * it has completed successfully and all received data has been consumed.
*/
bool rxrpc_kernel_check_life(const struct socket *sock,
const struct rxrpc_call *call)
{
- return call->state != RXRPC_CALL_COMPLETE;
+ if (!rxrpc_call_is_complete(call))
+ return true;
+ if (call->completion != RXRPC_CALL_SUCCEEDED)
+ return false;
+ return !skb_queue_empty(&call->recvmsg_queue);
}
EXPORT_SYMBOL(rxrpc_kernel_check_life);
@@ -872,9 +875,9 @@ static int rxrpc_release_sock(struct sock *sk)
sk->sk_state = RXRPC_CLOSE;
- if (rx->local && rcu_access_pointer(rx->local->service) == rx) {
+ if (rx->local && rx->local->service == rx) {
write_lock(&rx->local->services_lock);
- rcu_assign_pointer(rx->local->service, NULL);
+ rx->local->service = NULL;
write_unlock(&rx->local->services_lock);
}
@@ -957,16 +960,9 @@ static const struct net_proto_family rxrpc_family_ops = {
static int __init af_rxrpc_init(void)
{
int ret = -1;
- unsigned int tmp;
BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof_field(struct sk_buff, cb));
- get_random_bytes(&tmp, sizeof(tmp));
- tmp &= 0x3fffffff;
- if (tmp == 0)
- tmp = 1;
- idr_set_cursor(&rxrpc_client_conn_ids, tmp);
-
ret = -ENOMEM;
rxrpc_call_jar = kmem_cache_create(
"rxrpc_call_jar", sizeof(struct rxrpc_call), 0,
@@ -1062,7 +1058,6 @@ static void __exit af_rxrpc_exit(void)
* are released.
*/
rcu_barrier();
- rxrpc_destroy_client_conn_ids();
destroy_workqueue(rxrpc_workqueue);
rxrpc_exit_security();
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 18092526d3c8..433060cade03 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -38,6 +38,7 @@ struct rxrpc_txbuf;
enum rxrpc_skb_mark {
RXRPC_SKB_MARK_PACKET, /* Received packet */
RXRPC_SKB_MARK_ERROR, /* Error notification */
+ RXRPC_SKB_MARK_SERVICE_CONN_SECURED, /* Service connection response has been verified */
RXRPC_SKB_MARK_REJECT_BUSY, /* Reject with BUSY */
RXRPC_SKB_MARK_REJECT_ABORT, /* Reject with ABORT (code in skb->priority) */
};
@@ -75,13 +76,7 @@ struct rxrpc_net {
bool live;
- bool kill_all_client_conns;
atomic_t nr_client_conns;
- spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */
- struct mutex client_conn_discard_lock; /* Prevent multiple discarders */
- struct list_head idle_client_conns;
- struct work_struct client_conn_reaper;
- struct timer_list client_conn_reap_timer;
struct hlist_head local_endpoints;
struct mutex local_mutex; /* Lock for ->local_endpoints */
@@ -202,6 +197,7 @@ struct rxrpc_host_header {
* - max 48 bytes (struct sk_buff::cb)
*/
struct rxrpc_skb_priv {
+ struct rxrpc_connection *conn; /* Connection referred to (poke packet) */
u16 offset; /* Offset of data */
u16 len; /* Length of data */
u8 flags;
@@ -262,13 +258,11 @@ struct rxrpc_security {
/* respond to a challenge */
int (*respond_to_challenge)(struct rxrpc_connection *,
- struct sk_buff *,
- u32 *);
+ struct sk_buff *);
/* verify a response */
int (*verify_response)(struct rxrpc_connection *,
- struct sk_buff *,
- u32 *);
+ struct sk_buff *);
/* clear connection security */
void (*clear)(struct rxrpc_connection *);
@@ -283,22 +277,34 @@ struct rxrpc_local {
struct rcu_head rcu;
atomic_t active_users; /* Number of users of the local endpoint */
refcount_t ref; /* Number of references to the structure */
- struct rxrpc_net *rxnet; /* The network ns in which this resides */
+ struct net *net; /* The network namespace */
+ struct rxrpc_net *rxnet; /* Our bits in the network namespace */
struct hlist_node link;
struct socket *socket; /* my UDP socket */
struct task_struct *io_thread;
struct completion io_thread_ready; /* Indication that the I/O thread started */
- struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */
+ struct rxrpc_sock *service; /* Service(s) listening on this endpoint */
struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */
struct sk_buff_head rx_queue; /* Received packets */
+ struct list_head conn_attend_q; /* Conns requiring immediate attention */
struct list_head call_attend_q; /* Calls requiring immediate attention */
+
struct rb_root client_bundles; /* Client connection bundles by socket params */
spinlock_t client_bundles_lock; /* Lock for client_bundles */
+ bool kill_all_client_conns;
+ struct list_head idle_client_conns;
+ struct timer_list client_conn_reap_timer;
+ unsigned long client_conn_flags;
+#define RXRPC_CLIENT_CONN_REAP_TIMER 0 /* The client conn reap timer expired */
+
spinlock_t lock; /* access lock */
rwlock_t services_lock; /* lock for services list */
int debug_id; /* debug ID for printks */
bool dead;
bool service_closed; /* Service socket closed */
+ struct idr conn_ids; /* List of connection IDs */
+ struct list_head new_client_calls; /* Newly created client calls need connection */
+ spinlock_t client_call_lock; /* Lock for ->new_client_calls */
struct sockaddr_rxrpc srx; /* local address */
};
@@ -356,7 +362,6 @@ struct rxrpc_conn_proto {
struct rxrpc_conn_parameters {
struct rxrpc_local *local; /* Representation of local endpoint */
- struct rxrpc_peer *peer; /* Remote endpoint */
struct key *key; /* Security details */
bool exclusive; /* T if conn is exclusive */
bool upgrade; /* T if service ID can be upgraded */
@@ -365,10 +370,21 @@ struct rxrpc_conn_parameters {
};
/*
+ * Call completion condition (state == RXRPC_CALL_COMPLETE).
+ */
+enum rxrpc_call_completion {
+ RXRPC_CALL_SUCCEEDED, /* - Normal termination */
+ RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */
+ RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */
+ RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */
+ RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */
+ NR__RXRPC_CALL_COMPLETIONS
+};
+
+/*
* Bits in the connection flags.
*/
enum rxrpc_conn_flag {
- RXRPC_CONN_HAS_IDR, /* Has a client conn ID assigned */
RXRPC_CONN_IN_SERVICE_CONNS, /* Conn is in peer->service_conns */
RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */
RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */
@@ -388,6 +404,7 @@ enum rxrpc_conn_flag {
*/
enum rxrpc_conn_event {
RXRPC_CONN_EV_CHALLENGE, /* Send challenge packet */
+ RXRPC_CONN_EV_ABORT_CALLS, /* Abort attached calls */
};
/*
@@ -395,13 +412,13 @@ enum rxrpc_conn_event {
*/
enum rxrpc_conn_proto_state {
RXRPC_CONN_UNUSED, /* Connection not yet attempted */
+ RXRPC_CONN_CLIENT_UNSECURED, /* Client connection needs security init */
RXRPC_CONN_CLIENT, /* Client connection */
RXRPC_CONN_SERVICE_PREALLOC, /* Service connection preallocation */
RXRPC_CONN_SERVICE_UNSECURED, /* Service unsecured connection */
RXRPC_CONN_SERVICE_CHALLENGING, /* Service challenging for security */
RXRPC_CONN_SERVICE, /* Service secured connection */
- RXRPC_CONN_REMOTELY_ABORTED, /* Conn aborted by peer */
- RXRPC_CONN_LOCALLY_ABORTED, /* Conn aborted locally */
+ RXRPC_CONN_ABORTED, /* Conn aborted */
RXRPC_CONN__NR_STATES
};
@@ -412,17 +429,16 @@ struct rxrpc_bundle {
struct rxrpc_local *local; /* Representation of local endpoint */
struct rxrpc_peer *peer; /* Remote endpoint */
struct key *key; /* Security details */
+ const struct rxrpc_security *security; /* applied security module */
refcount_t ref;
atomic_t active; /* Number of active users */
unsigned int debug_id;
u32 security_level; /* Security level selected */
u16 service_id; /* Service ID for this connection */
bool try_upgrade; /* True if the bundle is attempting upgrade */
- bool alloc_conn; /* True if someone's getting a conn */
bool exclusive; /* T if conn is exclusive */
bool upgrade; /* T if service ID can be upgraded */
- short alloc_error; /* Error from last conn allocation */
- spinlock_t channel_lock;
+ unsigned short alloc_error; /* Error from last conn allocation */
struct rb_node local_node; /* Node in local->client_conns */
struct list_head waiting_calls; /* Calls waiting for channels */
unsigned long avail_chans; /* Mask of available channels */
@@ -440,6 +456,7 @@ struct rxrpc_connection {
struct rxrpc_peer *peer; /* Remote endpoint */
struct rxrpc_net *rxnet; /* Network namespace to which call belongs */
struct key *key; /* Security details */
+ struct list_head attend_link; /* Link in local->conn_attend_q */
refcount_t ref;
atomic_t active; /* Active count for service conns */
@@ -449,7 +466,7 @@ struct rxrpc_connection {
unsigned char act_chans; /* Mask of active channels */
struct rxrpc_channel {
unsigned long final_ack_at; /* Time at which to issue final ACK */
- struct rxrpc_call __rcu *call; /* Active call */
+ struct rxrpc_call *call; /* Active call */
unsigned int call_debug_id; /* call->debug_id */
u32 call_id; /* ID of current call */
u32 call_counter; /* Call ID counter */
@@ -470,6 +487,7 @@ struct rxrpc_connection {
struct list_head link; /* link in master connection list */
struct sk_buff_head rx_queue; /* received conn-level packets */
+ struct mutex security_lock; /* Lock for security management */
const struct rxrpc_security *security; /* applied security module */
union {
struct {
@@ -483,7 +501,8 @@ struct rxrpc_connection {
unsigned long idle_timestamp; /* Time at which last became idle */
spinlock_t state_lock; /* state-change lock */
enum rxrpc_conn_proto_state state; /* current state of connection */
- u32 abort_code; /* Abort code of connection abort */
+ enum rxrpc_call_completion completion; /* Completion condition */
+ s32 abort_code; /* Abort code of connection abort */
int debug_id; /* debug ID for printks */
atomic_t serial; /* packet serial number counter */
unsigned int hi_serial; /* highest serial number received */
@@ -527,7 +546,8 @@ enum rxrpc_call_flag {
RXRPC_CALL_KERNEL, /* The call was made by the kernel */
RXRPC_CALL_UPGRADE, /* Service upgrade was requested for the call */
RXRPC_CALL_EXCLUSIVE, /* The call uses a once-only connection */
- RXRPC_CALL_RX_IS_IDLE, /* Reception is idle - send an ACK */
+ RXRPC_CALL_RX_IS_IDLE, /* recvmsg() is idle - send an ACK */
+ RXRPC_CALL_RECVMSG_READ_ALL, /* recvmsg() read all of the received data */
};
/*
@@ -558,18 +578,6 @@ enum rxrpc_call_state {
};
/*
- * Call completion condition (state == RXRPC_CALL_COMPLETE).
- */
-enum rxrpc_call_completion {
- RXRPC_CALL_SUCCEEDED, /* - Normal termination */
- RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */
- RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */
- RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */
- RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */
- NR__RXRPC_CALL_COMPLETIONS
-};
-
-/*
* Call Tx congestion management modes.
*/
enum rxrpc_congest_mode {
@@ -587,6 +595,7 @@ enum rxrpc_congest_mode {
struct rxrpc_call {
struct rcu_head rcu;
struct rxrpc_connection *conn; /* connection carrying call */
+ struct rxrpc_bundle *bundle; /* Connection bundle to use */
struct rxrpc_peer *peer; /* Peer record for remote address */
struct rxrpc_local *local; /* Representation of local endpoint */
struct rxrpc_sock __rcu *socket; /* socket responsible */
@@ -609,7 +618,7 @@ struct rxrpc_call {
struct work_struct destroyer; /* In-process-context destroyer */
rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */
struct list_head link; /* link in master call list */
- struct list_head chan_wait_link; /* Link in conn->bundle->waiting_calls */
+ struct list_head wait_link; /* Link in local->new_client_calls */
struct hlist_node error_link; /* link in error distribution list */
struct list_head accept_link; /* Link in rx->acceptq */
struct list_head recvmsg_link; /* Link in rx->recvmsg_q */
@@ -623,10 +632,13 @@ struct rxrpc_call {
unsigned long flags;
unsigned long events;
spinlock_t notify_lock; /* Kernel notification lock */
- rwlock_t state_lock; /* lock for state transition */
- u32 abort_code; /* Local/remote abort code */
+ unsigned int send_abort_why; /* Why the abort [enum rxrpc_abort_reason] */
+ s32 send_abort; /* Abort code to be sent */
+ short send_abort_err; /* Error to be associated with the abort */
+ rxrpc_seq_t send_abort_seq; /* DATA packet that incurred the abort (or 0) */
+ s32 abort_code; /* Local/remote abort code */
int error; /* Local error incurred */
- enum rxrpc_call_state state; /* current state of call */
+ enum rxrpc_call_state _state; /* Current state of call (needs barrier) */
enum rxrpc_call_completion completion; /* Call completion condition */
refcount_t ref;
u8 security_ix; /* Security type */
@@ -812,9 +824,11 @@ extern struct workqueue_struct *rxrpc_workqueue;
*/
int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t);
void rxrpc_discard_prealloc(struct rxrpc_sock *);
-int rxrpc_new_incoming_call(struct rxrpc_local *, struct rxrpc_peer *,
- struct rxrpc_connection *, struct sockaddr_rxrpc *,
- struct sk_buff *);
+bool rxrpc_new_incoming_call(struct rxrpc_local *local,
+ struct rxrpc_peer *peer,
+ struct rxrpc_connection *conn,
+ struct sockaddr_rxrpc *peer_srx,
+ struct sk_buff *skb);
void rxrpc_accept_incoming_calls(struct rxrpc_local *);
int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long);
@@ -834,7 +848,7 @@ void rxrpc_reduce_call_timer(struct rxrpc_call *call,
unsigned long now,
enum rxrpc_timer_trace why);
-void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb);
+bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb);
/*
* call_object.c
@@ -851,6 +865,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
struct sockaddr_rxrpc *,
struct rxrpc_call_params *, gfp_t,
unsigned int);
+void rxrpc_start_call_timer(struct rxrpc_call *call);
void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *,
struct sk_buff *);
void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *);
@@ -873,32 +888,88 @@ static inline bool rxrpc_is_client_call(const struct rxrpc_call *call)
}
/*
+ * call_state.c
+ */
+bool rxrpc_set_call_completion(struct rxrpc_call *call,
+ enum rxrpc_call_completion compl,
+ u32 abort_code,
+ int error);
+bool rxrpc_call_completed(struct rxrpc_call *call);
+bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq,
+ u32 abort_code, int error, enum rxrpc_abort_reason why);
+void rxrpc_prefail_call(struct rxrpc_call *call, enum rxrpc_call_completion compl,
+ int error);
+
+static inline void rxrpc_set_call_state(struct rxrpc_call *call,
+ enum rxrpc_call_state state)
+{
+ /* Order write of completion info before write of ->state. */
+ smp_store_release(&call->_state, state);
+ wake_up(&call->waitq);
+}
+
+static inline enum rxrpc_call_state __rxrpc_call_state(const struct rxrpc_call *call)
+{
+ return call->_state; /* Only inside I/O thread */
+}
+
+static inline bool __rxrpc_call_is_complete(const struct rxrpc_call *call)
+{
+ return __rxrpc_call_state(call) == RXRPC_CALL_COMPLETE;
+}
+
+static inline enum rxrpc_call_state rxrpc_call_state(const struct rxrpc_call *call)
+{
+ /* Order read ->state before read of completion info. */
+ return smp_load_acquire(&call->_state);
+}
+
+static inline bool rxrpc_call_is_complete(const struct rxrpc_call *call)
+{
+ return rxrpc_call_state(call) == RXRPC_CALL_COMPLETE;
+}
+
+static inline bool rxrpc_call_has_failed(const struct rxrpc_call *call)
+{
+ return rxrpc_call_is_complete(call) && call->completion != RXRPC_CALL_SUCCEEDED;
+}
+
+/*
* conn_client.c
*/
extern unsigned int rxrpc_reap_client_connections;
extern unsigned long rxrpc_conn_idle_client_expiry;
extern unsigned long rxrpc_conn_idle_client_fast_expiry;
-extern struct idr rxrpc_client_conn_ids;
-void rxrpc_destroy_client_conn_ids(void);
+void rxrpc_purge_client_connections(struct rxrpc_local *local);
struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace);
void rxrpc_put_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace);
-int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_call *,
- struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *,
- gfp_t);
+int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp);
+void rxrpc_connect_client_calls(struct rxrpc_local *local);
void rxrpc_expose_client_call(struct rxrpc_call *);
void rxrpc_disconnect_client_call(struct rxrpc_bundle *, struct rxrpc_call *);
+void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle);
void rxrpc_put_client_conn(struct rxrpc_connection *, enum rxrpc_conn_trace);
-void rxrpc_discard_expired_client_conns(struct work_struct *);
-void rxrpc_destroy_all_client_connections(struct rxrpc_net *);
+void rxrpc_discard_expired_client_conns(struct rxrpc_local *local);
void rxrpc_clean_up_local_conns(struct rxrpc_local *);
/*
* conn_event.c
*/
+void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, struct sk_buff *skb,
+ unsigned int channel);
+int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb,
+ s32 abort_code, int err, enum rxrpc_abort_reason why);
void rxrpc_process_connection(struct work_struct *);
void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool);
-int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb);
+bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb);
+void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb);
+
+static inline bool rxrpc_is_conn_aborted(const struct rxrpc_connection *conn)
+{
+ /* Order reading the abort info after the state check. */
+ return smp_load_acquire(&conn->state) == RXRPC_CONN_ABORTED;
+}
/*
* conn_object.c
@@ -906,6 +977,7 @@ int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb);
extern unsigned int rxrpc_connection_expiry;
extern unsigned int rxrpc_closed_conn_expiry;
+void rxrpc_poke_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why);
struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *, gfp_t);
struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *,
struct sockaddr_rxrpc *,
@@ -961,12 +1033,19 @@ void rxrpc_implicit_end_call(struct rxrpc_call *, struct sk_buff *);
*/
int rxrpc_encap_rcv(struct sock *, struct sk_buff *);
void rxrpc_error_report(struct sock *);
+bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why,
+ s32 abort_code, int err);
int rxrpc_io_thread(void *data);
static inline void rxrpc_wake_up_io_thread(struct rxrpc_local *local)
{
wake_up_process(local->io_thread);
}
+static inline bool rxrpc_protocol_error(struct sk_buff *skb, enum rxrpc_abort_reason why)
+{
+ return rxrpc_direct_abort(skb, why, RX_PROTOCOL_ERROR, -EPROTO);
+}
+
/*
* insecure.c
*/
@@ -1048,6 +1127,7 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net)
int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb);
int rxrpc_send_abort_packet(struct rxrpc_call *);
int rxrpc_send_data_packet(struct rxrpc_call *, struct rxrpc_txbuf *);
+void rxrpc_send_conn_abort(struct rxrpc_connection *conn);
void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb);
void rxrpc_send_keepalive(struct rxrpc_peer *);
void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb);
@@ -1063,12 +1143,11 @@ void rxrpc_peer_keepalive_worker(struct work_struct *);
*/
struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *,
const struct sockaddr_rxrpc *);
-struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *, struct rxrpc_local *,
- struct sockaddr_rxrpc *, gfp_t);
+struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
+ struct sockaddr_rxrpc *srx, gfp_t gfp);
struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t,
enum rxrpc_peer_trace);
-void rxrpc_new_incoming_peer(struct rxrpc_sock *, struct rxrpc_local *,
- struct rxrpc_peer *);
+void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer);
void rxrpc_destroy_all_peers(struct rxrpc_net *);
struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *, enum rxrpc_peer_trace);
struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *, enum rxrpc_peer_trace);
@@ -1086,33 +1165,22 @@ extern const struct seq_operations rxrpc_local_seq_ops;
* recvmsg.c
*/
void rxrpc_notify_socket(struct rxrpc_call *);
-bool __rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int);
-bool rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int);
-bool __rxrpc_call_completed(struct rxrpc_call *);
-bool rxrpc_call_completed(struct rxrpc_call *);
-bool __rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int);
-bool rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int);
int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int);
/*
* Abort a call due to a protocol error.
*/
-static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call,
- struct sk_buff *skb,
- const char *eproto_why,
- const char *why,
- u32 abort_code)
+static inline int rxrpc_abort_eproto(struct rxrpc_call *call,
+ struct sk_buff *skb,
+ s32 abort_code,
+ enum rxrpc_abort_reason why)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why);
- return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO);
+ rxrpc_abort_call(call, sp->hdr.seq, abort_code, -EPROTO, why);
+ return -EPROTO;
}
-#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \
- __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \
- (abort_why), (abort_code))
-
/*
* rtt.c
*/
@@ -1144,6 +1212,8 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *,
/*
* sendmsg.c
*/
+bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error,
+ enum rxrpc_abort_reason why);
int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t);
/*
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index c02401656fa9..3e8689fdc437 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -99,7 +99,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
if (!call)
return -ENOMEM;
call->flags |= (1 << RXRPC_CALL_IS_SERVICE);
- call->state = RXRPC_CALL_SERVER_PREALLOC;
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_PREALLOC);
__set_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events);
trace_rxrpc_call(call->debug_id, refcount_read(&call->ref),
@@ -280,7 +280,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
(peer_tail + 1) &
(RXRPC_BACKLOG_MAX - 1));
- rxrpc_new_incoming_peer(rx, local, peer);
+ rxrpc_new_incoming_peer(local, peer);
}
/* Now allocate and set up the connection */
@@ -326,11 +326,11 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
* If we want to report an error, we mark the skb with the packet type and
* abort code and return false.
*/
-int rxrpc_new_incoming_call(struct rxrpc_local *local,
- struct rxrpc_peer *peer,
- struct rxrpc_connection *conn,
- struct sockaddr_rxrpc *peer_srx,
- struct sk_buff *skb)
+bool rxrpc_new_incoming_call(struct rxrpc_local *local,
+ struct rxrpc_peer *peer,
+ struct rxrpc_connection *conn,
+ struct sockaddr_rxrpc *peer_srx,
+ struct sk_buff *skb)
{
const struct rxrpc_security *sec = NULL;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
@@ -339,18 +339,17 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local,
_enter("");
- /* Don't set up a call for anything other than the first DATA packet. */
- if (sp->hdr.seq != 1 ||
- sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
- return 0; /* Just discard */
+ /* Don't set up a call for anything other than a DATA packet. */
+ if (sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
+ return rxrpc_protocol_error(skb, rxrpc_eproto_no_service_call);
- rcu_read_lock();
+ read_lock(&local->services_lock);
/* Weed out packets to services we're not offering. Packets that would
* begin a call are explicitly rejected and the rest are just
* discarded.
*/
- rx = rcu_dereference(local->service);
+ rx = local->service;
if (!rx || (sp->hdr.serviceId != rx->srx.srx_service &&
sp->hdr.serviceId != rx->second_service)
) {
@@ -363,16 +362,14 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local,
if (!conn) {
sec = rxrpc_get_incoming_security(rx, skb);
if (!sec)
- goto reject;
+ goto unsupported_security;
}
spin_lock(&rx->incoming_lock);
if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED ||
rx->sk.sk_state == RXRPC_CLOSE) {
- trace_rxrpc_abort(0, "CLS", sp->hdr.cid, sp->hdr.callNumber,
- sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN);
- skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
- skb->priority = RX_INVALID_OPERATION;
+ rxrpc_direct_abort(skb, rxrpc_abort_shut_down,
+ RX_INVALID_OPERATION, -ESHUTDOWN);
goto no_call;
}
@@ -402,7 +399,7 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local,
spin_unlock(&conn->state_lock);
spin_unlock(&rx->incoming_lock);
- rcu_read_unlock();
+ read_unlock(&local->services_lock);
if (hlist_unhashed(&call->error_link)) {
spin_lock(&call->peer->lock);
@@ -413,22 +410,24 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local,
_leave(" = %p{%d}", call, call->debug_id);
rxrpc_input_call_event(call, skb);
rxrpc_put_call(call, rxrpc_call_put_input);
- return 0;
+ return true;
unsupported_service:
- trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_INVALID_OPERATION, EOPNOTSUPP);
- skb->priority = RX_INVALID_OPERATION;
- goto reject;
+ read_unlock(&local->services_lock);
+ return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered,
+ RX_INVALID_OPERATION, -EOPNOTSUPP);
+unsupported_security:
+ read_unlock(&local->services_lock);
+ return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered,
+ RX_INVALID_OPERATION, -EKEYREJECTED);
no_call:
spin_unlock(&rx->incoming_lock);
-reject:
- rcu_read_unlock();
+ read_unlock(&local->services_lock);
_leave(" = f [%u]", skb->mark);
- return -EPROTO;
+ return false;
discard:
- rcu_read_unlock();
- return 0;
+ read_unlock(&local->services_lock);
+ return true;
}
/*
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index b2cf448fb02c..1abdef15debc 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -251,6 +251,41 @@ out:
_leave("");
}
+/*
+ * Start transmitting the reply to a service. This cancels the need to ACK the
+ * request if we haven't yet done so.
+ */
+static void rxrpc_begin_service_reply(struct rxrpc_call *call)
+{
+ unsigned long now = jiffies;
+
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SEND_REPLY);
+ WRITE_ONCE(call->delay_ack_at, now + MAX_JIFFY_OFFSET);
+ if (call->ackr_reason == RXRPC_ACK_DELAY)
+ call->ackr_reason = 0;
+ trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now);
+}
+
+/*
+ * Close the transmission phase. After this point there is no more data to be
+ * transmitted in the call.
+ */
+static void rxrpc_close_tx_phase(struct rxrpc_call *call)
+{
+ _debug("________awaiting reply/ACK__________");
+
+ switch (__rxrpc_call_state(call)) {
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_REPLY);
+ break;
+ case RXRPC_CALL_SERVER_SEND_REPLY:
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_AWAIT_ACK);
+ break;
+ default:
+ break;
+ }
+}
+
static bool rxrpc_tx_window_has_space(struct rxrpc_call *call)
{
unsigned int winsize = min_t(unsigned int, call->tx_winsize,
@@ -270,9 +305,11 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call)
{
struct rxrpc_txbuf *txb;
- if (rxrpc_is_client_call(call) &&
- !test_bit(RXRPC_CALL_EXPOSED, &call->flags))
+ if (!test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
+ if (list_empty(&call->tx_sendmsg))
+ return;
rxrpc_expose_client_call(call);
+ }
while ((txb = list_first_entry_or_null(&call->tx_sendmsg,
struct rxrpc_txbuf, call_link))) {
@@ -283,6 +320,9 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call)
call->tx_top = txb->seq;
list_add_tail(&txb->call_link, &call->tx_buffer);
+ if (txb->wire.flags & RXRPC_LAST_PACKET)
+ rxrpc_close_tx_phase(call);
+
rxrpc_transmit_one(call, txb);
if (!rxrpc_tx_window_has_space(call))
@@ -292,16 +332,15 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call)
static void rxrpc_transmit_some_data(struct rxrpc_call *call)
{
- switch (call->state) {
+ switch (__rxrpc_call_state(call)) {
case RXRPC_CALL_SERVER_ACK_REQUEST:
if (list_empty(&call->tx_sendmsg))
return;
+ rxrpc_begin_service_reply(call);
fallthrough;
case RXRPC_CALL_SERVER_SEND_REPLY:
- case RXRPC_CALL_SERVER_AWAIT_ACK:
case RXRPC_CALL_CLIENT_SEND_REQUEST:
- case RXRPC_CALL_CLIENT_AWAIT_REPLY:
if (!rxrpc_tx_window_has_space(call))
return;
if (list_empty(&call->tx_sendmsg)) {
@@ -331,21 +370,31 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call)
/*
* Handle retransmission and deferred ACK/abort generation.
*/
-void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
+bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
{
unsigned long now, next, t;
rxrpc_serial_t ackr_serial;
bool resend = false, expired = false;
+ s32 abort_code;
rxrpc_see_call(call, rxrpc_call_see_input);
//printk("\n--------------------\n");
_enter("{%d,%s,%lx}",
- call->debug_id, rxrpc_call_states[call->state], call->events);
+ call->debug_id, rxrpc_call_states[__rxrpc_call_state(call)],
+ call->events);
- if (call->state == RXRPC_CALL_COMPLETE)
+ if (__rxrpc_call_is_complete(call))
goto out;
+ /* Handle abort request locklessly, vs rxrpc_propose_abort(). */
+ abort_code = smp_load_acquire(&call->send_abort);
+ if (abort_code) {
+ rxrpc_abort_call(call, 0, call->send_abort, call->send_abort_err,
+ call->send_abort_why);
+ goto out;
+ }
+
if (skb && skb->mark == RXRPC_SKB_MARK_ERROR)
goto out;
@@ -358,7 +407,7 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
}
t = READ_ONCE(call->expect_req_by);
- if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST &&
+ if (__rxrpc_call_state(call) == RXRPC_CALL_SERVER_RECV_REQUEST &&
time_after_eq(now, t)) {
trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now);
expired = true;
@@ -429,11 +478,12 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) &&
(int)call->conn->hi_serial - (int)call->rx_serial > 0) {
trace_rxrpc_call_reset(call);
- rxrpc_abort_call("EXP", call, 0, RX_CALL_DEAD, -ECONNRESET);
+ rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ECONNRESET,
+ rxrpc_abort_call_reset);
} else {
- rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME);
+ rxrpc_abort_call(call, 0, RX_CALL_TIMEOUT, -ETIME,
+ rxrpc_abort_call_timeout);
}
- rxrpc_send_abort_packet(call);
goto out;
}
@@ -441,7 +491,7 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
rxrpc_propose_ack_ping_for_lost_ack);
- if (resend && call->state != RXRPC_CALL_CLIENT_RECV_REPLY)
+ if (resend && __rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY)
rxrpc_resend(call, NULL);
if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags))
@@ -453,7 +503,7 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_propose_ack_input_data);
/* Make sure the timer is restarted */
- if (call->state != RXRPC_CALL_COMPLETE) {
+ if (!__rxrpc_call_is_complete(call)) {
next = call->expect_rx_by;
#define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; }
@@ -474,9 +524,15 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
}
out:
- if (call->state == RXRPC_CALL_COMPLETE)
+ if (__rxrpc_call_is_complete(call)) {
del_timer_sync(&call->timer);
+ if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
+ rxrpc_disconnect_call(call);
+ if (call->security)
+ call->security->free_call_crypto(call);
+ }
if (call->acks_hard_ack != call->tx_bottom)
rxrpc_shrink_call_tx_buffer(call);
_leave("");
+ return true;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 89dcf60b1158..f3c9f0201c15 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -50,7 +50,7 @@ void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what)
struct rxrpc_local *local = call->local;
bool busy;
- if (call->state < RXRPC_CALL_COMPLETE) {
+ if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) {
spin_lock_bh(&local->lock);
busy = !list_empty(&call->attend_link);
trace_rxrpc_poke_call(call, busy, what);
@@ -69,7 +69,7 @@ static void rxrpc_call_timer_expired(struct timer_list *t)
_enter("%d", call->debug_id);
- if (call->state < RXRPC_CALL_COMPLETE) {
+ if (!__rxrpc_call_is_complete(call)) {
trace_rxrpc_timer_expired(call, jiffies);
rxrpc_poke_call(call, rxrpc_call_poke_timer);
}
@@ -150,7 +150,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
timer_setup(&call->timer, rxrpc_call_timer_expired, 0);
INIT_WORK(&call->destroyer, rxrpc_destroy_call);
INIT_LIST_HEAD(&call->link);
- INIT_LIST_HEAD(&call->chan_wait_link);
+ INIT_LIST_HEAD(&call->wait_link);
INIT_LIST_HEAD(&call->accept_link);
INIT_LIST_HEAD(&call->recvmsg_link);
INIT_LIST_HEAD(&call->sock_link);
@@ -162,7 +162,6 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
init_waitqueue_head(&call->waitq);
spin_lock_init(&call->notify_lock);
spin_lock_init(&call->tx_lock);
- rwlock_init(&call->state_lock);
refcount_set(&call->ref, 1);
call->debug_id = debug_id;
call->tx_total_len = -1;
@@ -211,7 +210,6 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
now = ktime_get_real();
call->acks_latest_ts = now;
call->cong_tstamp = now;
- call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
call->dest_srx = *srx;
call->interruptibility = p->interruptibility;
call->tx_total_len = p->tx_total_len;
@@ -227,11 +225,13 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
ret = rxrpc_init_client_call_security(call);
if (ret < 0) {
- __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret);
+ rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, ret);
rxrpc_put_call(call, rxrpc_call_put_discard_error);
return ERR_PTR(ret);
}
+ rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_CONN);
+
trace_rxrpc_call(call->debug_id, refcount_read(&call->ref),
p->user_call_ID, rxrpc_call_new_client);
@@ -242,7 +242,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
/*
* Initiate the call ack/resend/expiry timer.
*/
-static void rxrpc_start_call_timer(struct rxrpc_call *call)
+void rxrpc_start_call_timer(struct rxrpc_call *call)
{
unsigned long now = jiffies;
unsigned long j = now + MAX_JIFFY_OFFSET;
@@ -287,6 +287,39 @@ static void rxrpc_put_call_slot(struct rxrpc_call *call)
}
/*
+ * Start the process of connecting a call. We obtain a peer and a connection
+ * bundle, but the actual association of a call with a connection is offloaded
+ * to the I/O thread to simplify locking.
+ */
+static int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp)
+{
+ struct rxrpc_local *local = call->local;
+ int ret = -ENOMEM;
+
+ _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
+
+ call->peer = rxrpc_lookup_peer(local, &call->dest_srx, gfp);
+ if (!call->peer)
+ goto error;
+
+ ret = rxrpc_look_up_bundle(call, gfp);
+ if (ret < 0)
+ goto error;
+
+ trace_rxrpc_client(NULL, -1, rxrpc_client_queue_new_call);
+ rxrpc_get_call(call, rxrpc_call_get_io_thread);
+ spin_lock(&local->client_call_lock);
+ list_add_tail(&call->wait_link, &local->new_client_calls);
+ spin_unlock(&local->client_call_lock);
+ rxrpc_wake_up_io_thread(local);
+ return 0;
+
+error:
+ __set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
+ return ret;
+}
+
+/*
* Set up a call for the given parameters.
* - Called with the socket lock held, which it must release.
* - If it returns a call, the call's lock will need releasing by the caller.
@@ -365,14 +398,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
/* Set up or get a connection record and set the protocol parameters,
* including channel number and call ID.
*/
- ret = rxrpc_connect_call(rx, call, cp, srx, gfp);
+ ret = rxrpc_connect_call(call, gfp);
if (ret < 0)
goto error_attached_to_socket;
- rxrpc_see_call(call, rxrpc_call_see_connected);
-
- rxrpc_start_call_timer(call);
-
_leave(" = %p [new]", call);
return call;
@@ -384,27 +413,23 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
error_dup_user_ID:
write_unlock(&rx->call_lock);
release_sock(&rx->sk);
- __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
- RX_CALL_DEAD, -EEXIST);
+ rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, -EEXIST);
trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), 0,
rxrpc_call_see_userid_exists);
- rxrpc_release_call(rx, call);
mutex_unlock(&call->user_mutex);
rxrpc_put_call(call, rxrpc_call_put_userid_exists);
_leave(" = -EEXIST");
return ERR_PTR(-EEXIST);
/* We got an error, but the call is attached to the socket and is in
- * need of release. However, we might now race with recvmsg() when
- * completing the call queues it. Return 0 from sys_sendmsg() and
+ * need of release. However, we might now race with recvmsg() when it
+ * completion notifies the socket. Return 0 from sys_sendmsg() and
* leave the error to recvmsg() to deal with.
*/
error_attached_to_socket:
trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), ret,
rxrpc_call_see_connect_failed);
- set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
- __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
- RX_CALL_DEAD, ret);
+ rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret);
_leave(" = c=%08x [err]", call->debug_id);
return call;
}
@@ -427,32 +452,32 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
call->call_id = sp->hdr.callNumber;
call->dest_srx.srx_service = sp->hdr.serviceId;
call->cid = sp->hdr.cid;
- call->state = RXRPC_CALL_SERVER_SECURING;
call->cong_tstamp = skb->tstamp;
+ __set_bit(RXRPC_CALL_EXPOSED, &call->flags);
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING);
+
spin_lock(&conn->state_lock);
switch (conn->state) {
case RXRPC_CONN_SERVICE_UNSECURED:
case RXRPC_CONN_SERVICE_CHALLENGING:
- call->state = RXRPC_CALL_SERVER_SECURING;
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING);
break;
case RXRPC_CONN_SERVICE:
- call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST);
break;
- case RXRPC_CONN_REMOTELY_ABORTED:
- __rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
- conn->abort_code, conn->error);
- break;
- case RXRPC_CONN_LOCALLY_ABORTED:
- __rxrpc_abort_call("CON", call, 1,
- conn->abort_code, conn->error);
+ case RXRPC_CONN_ABORTED:
+ rxrpc_set_call_completion(call, conn->completion,
+ conn->abort_code, conn->error);
break;
default:
BUG();
}
+ rxrpc_get_call(call, rxrpc_call_get_io_thread);
+
/* Set the channel for this call. We don't get channel_lock as we're
* only defending against the data_ready handler (which we're called
* from) and the RESPONSE packet parser (which is only really
@@ -462,7 +487,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
chan = sp->hdr.cid & RXRPC_CHANNELMASK;
conn->channels[chan].call_counter = call->call_id;
conn->channels[chan].call_id = call->call_id;
- rcu_assign_pointer(conn->channels[chan].call, call);
+ conn->channels[chan].call = call;
spin_unlock(&conn->state_lock);
spin_lock(&conn->peer->lock);
@@ -522,20 +547,17 @@ static void rxrpc_cleanup_ring(struct rxrpc_call *call)
void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
{
struct rxrpc_connection *conn = call->conn;
- bool put = false;
+ bool put = false, putu = false;
_enter("{%d,%d}", call->debug_id, refcount_read(&call->ref));
trace_rxrpc_call(call->debug_id, refcount_read(&call->ref),
call->flags, rxrpc_call_see_release);
- ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
-
if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags))
BUG();
rxrpc_put_call_slot(call);
- del_timer_sync(&call->timer);
/* Make sure we don't get any more notifications */
write_lock(&rx->recvmsg_lock);
@@ -560,7 +582,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
rb_erase(&call->sock_node, &rx->calls);
memset(&call->sock_node, 0xdd, sizeof(call->sock_node));
- rxrpc_put_call(call, rxrpc_call_put_userid_exists);
+ putu = true;
}
list_del(&call->sock_link);
@@ -568,10 +590,9 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
- if (conn && !test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
- rxrpc_disconnect_call(call);
- if (call->security)
- call->security->free_call_crypto(call);
+ if (putu)
+ rxrpc_put_call(call, rxrpc_call_put_userid);
+
_leave("");
}
@@ -588,7 +609,8 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
call = list_entry(rx->to_be_accepted.next,
struct rxrpc_call, accept_link);
list_del(&call->accept_link);
- rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, -ECONNRESET);
+ rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET,
+ rxrpc_abort_call_sock_release_tba);
rxrpc_put_call(call, rxrpc_call_put_release_sock_tba);
}
@@ -596,8 +618,8 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
call = list_entry(rx->sock_calls.next,
struct rxrpc_call, sock_link);
rxrpc_get_call(call, rxrpc_call_get_release_sock);
- rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, -ECONNRESET);
- rxrpc_send_abort_packet(call);
+ rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET,
+ rxrpc_abort_call_sock_release);
rxrpc_release_call(rx, call);
rxrpc_put_call(call, rxrpc_call_put_release_sock);
}
@@ -620,7 +642,7 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace why)
dead = __refcount_dec_and_test(&call->ref, &r);
trace_rxrpc_call(debug_id, r - 1, 0, why);
if (dead) {
- ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
+ ASSERTCMP(__rxrpc_call_state(call), ==, RXRPC_CALL_COMPLETE);
if (!list_empty(&call->link)) {
spin_lock(&rxnet->call_lock);
@@ -669,6 +691,8 @@ static void rxrpc_destroy_call(struct work_struct *work)
rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned);
rxrpc_put_connection(call->conn, rxrpc_conn_put_call);
+ rxrpc_deactivate_bundle(call->bundle);
+ rxrpc_put_bundle(call->bundle, rxrpc_bundle_put_call);
rxrpc_put_peer(call->peer, rxrpc_peer_put_call);
rxrpc_put_local(call->local, rxrpc_local_put_call);
call_rcu(&call->rcu, rxrpc_rcu_free_call);
@@ -681,7 +705,7 @@ void rxrpc_cleanup_call(struct rxrpc_call *call)
{
memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
- ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
+ ASSERTCMP(__rxrpc_call_state(call), ==, RXRPC_CALL_COMPLETE);
ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
del_timer(&call->timer);
@@ -719,7 +743,7 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
call, refcount_read(&call->ref),
- rxrpc_call_states[call->state],
+ rxrpc_call_states[__rxrpc_call_state(call)],
call->flags, call->events);
spin_unlock(&rxnet->call_lock);
diff --git a/net/rxrpc/call_state.c b/net/rxrpc/call_state.c
new file mode 100644
index 000000000000..6afb54373ebb
--- /dev/null
+++ b/net/rxrpc/call_state.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Call state changing functions.
+ *
+ * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include "ar-internal.h"
+
+/*
+ * Transition a call to the complete state.
+ */
+bool rxrpc_set_call_completion(struct rxrpc_call *call,
+ enum rxrpc_call_completion compl,
+ u32 abort_code,
+ int error)
+{
+ if (__rxrpc_call_state(call) == RXRPC_CALL_COMPLETE)
+ return false;
+
+ call->abort_code = abort_code;
+ call->error = error;
+ call->completion = compl;
+ /* Allow reader of completion state to operate locklessly */
+ rxrpc_set_call_state(call, RXRPC_CALL_COMPLETE);
+ trace_rxrpc_call_complete(call);
+ wake_up(&call->waitq);
+ rxrpc_notify_socket(call);
+ return true;
+}
+
+/*
+ * Record that a call successfully completed.
+ */
+bool rxrpc_call_completed(struct rxrpc_call *call)
+{
+ return rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0);
+}
+
+/*
+ * Record that a call is locally aborted.
+ */
+bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq,
+ u32 abort_code, int error, enum rxrpc_abort_reason why)
+{
+ trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq,
+ abort_code, error);
+ if (!rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED,
+ abort_code, error))
+ return false;
+ if (test_bit(RXRPC_CALL_EXPOSED, &call->flags))
+ rxrpc_send_abort_packet(call);
+ return true;
+}
+
+/*
+ * Record that a call errored out before even getting off the ground, thereby
+ * setting the state to allow it to be destroyed.
+ */
+void rxrpc_prefail_call(struct rxrpc_call *call, enum rxrpc_call_completion compl,
+ int error)
+{
+ call->abort_code = RX_CALL_DEAD;
+ call->error = error;
+ call->completion = compl;
+ call->_state = RXRPC_CALL_COMPLETE;
+ trace_rxrpc_call_complete(call);
+ WARN_ON_ONCE(__test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags));
+}
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 87efa0373aed..981ca5b98bcb 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -34,104 +34,59 @@ __read_mostly unsigned int rxrpc_reap_client_connections = 900;
__read_mostly unsigned long rxrpc_conn_idle_client_expiry = 2 * 60 * HZ;
__read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
-/*
- * We use machine-unique IDs for our client connections.
- */
-DEFINE_IDR(rxrpc_client_conn_ids);
-static DEFINE_SPINLOCK(rxrpc_conn_id_lock);
-
-static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle);
-
-/*
- * Get a connection ID and epoch for a client connection from the global pool.
- * The connection struct pointer is then recorded in the idr radix tree. The
- * epoch doesn't change until the client is rebooted (or, at least, unless the
- * module is unloaded).
- */
-static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
- gfp_t gfp)
+static void rxrpc_activate_bundle(struct rxrpc_bundle *bundle)
{
- struct rxrpc_net *rxnet = conn->rxnet;
- int id;
-
- _enter("");
-
- idr_preload(gfp);
- spin_lock(&rxrpc_conn_id_lock);
-
- id = idr_alloc_cyclic(&rxrpc_client_conn_ids, conn,
- 1, 0x40000000, GFP_NOWAIT);
- if (id < 0)
- goto error;
-
- spin_unlock(&rxrpc_conn_id_lock);
- idr_preload_end();
-
- conn->proto.epoch = rxnet->epoch;
- conn->proto.cid = id << RXRPC_CIDSHIFT;
- set_bit(RXRPC_CONN_HAS_IDR, &conn->flags);
- _leave(" [CID %x]", conn->proto.cid);
- return 0;
-
-error:
- spin_unlock(&rxrpc_conn_id_lock);
- idr_preload_end();
- _leave(" = %d", id);
- return id;
+ atomic_inc(&bundle->active);
}
/*
- * Release a connection ID for a client connection from the global pool.
+ * Release a connection ID for a client connection.
*/
-static void rxrpc_put_client_connection_id(struct rxrpc_connection *conn)
+static void rxrpc_put_client_connection_id(struct rxrpc_local *local,
+ struct rxrpc_connection *conn)
{
- if (test_bit(RXRPC_CONN_HAS_IDR, &conn->flags)) {
- spin_lock(&rxrpc_conn_id_lock);
- idr_remove(&rxrpc_client_conn_ids,
- conn->proto.cid >> RXRPC_CIDSHIFT);
- spin_unlock(&rxrpc_conn_id_lock);
- }
+ idr_remove(&local->conn_ids, conn->proto.cid >> RXRPC_CIDSHIFT);
}
/*
* Destroy the client connection ID tree.
*/
-void rxrpc_destroy_client_conn_ids(void)
+static void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local)
{
struct rxrpc_connection *conn;
int id;
- if (!idr_is_empty(&rxrpc_client_conn_ids)) {
- idr_for_each_entry(&rxrpc_client_conn_ids, conn, id) {
+ if (!idr_is_empty(&local->conn_ids)) {
+ idr_for_each_entry(&local->conn_ids, conn, id) {
pr_err("AF_RXRPC: Leaked client conn %p {%d}\n",
conn, refcount_read(&conn->ref));
}
BUG();
}
- idr_destroy(&rxrpc_client_conn_ids);
+ idr_destroy(&local->conn_ids);
}
/*
* Allocate a connection bundle.
*/
-static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp,
+static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call,
gfp_t gfp)
{
struct rxrpc_bundle *bundle;
bundle = kzalloc(sizeof(*bundle), gfp);
if (bundle) {
- bundle->local = cp->local;
- bundle->peer = rxrpc_get_peer(cp->peer, rxrpc_peer_get_bundle);
- bundle->key = cp->key;
- bundle->exclusive = cp->exclusive;
- bundle->upgrade = cp->upgrade;
- bundle->service_id = cp->service_id;
- bundle->security_level = cp->security_level;
+ bundle->local = call->local;
+ bundle->peer = rxrpc_get_peer(call->peer, rxrpc_peer_get_bundle);
+ bundle->key = key_get(call->key);
+ bundle->security = call->security;
+ bundle->exclusive = test_bit(RXRPC_CALL_EXCLUSIVE, &call->flags);
+ bundle->upgrade = test_bit(RXRPC_CALL_UPGRADE, &call->flags);
+ bundle->service_id = call->dest_srx.srx_service;
+ bundle->security_level = call->security_level;
refcount_set(&bundle->ref, 1);
atomic_set(&bundle->active, 1);
- spin_lock_init(&bundle->channel_lock);
INIT_LIST_HEAD(&bundle->waiting_calls);
trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_new);
}
@@ -152,84 +107,87 @@ static void rxrpc_free_bundle(struct rxrpc_bundle *bundle)
{
trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_free);
rxrpc_put_peer(bundle->peer, rxrpc_peer_put_bundle);
+ key_put(bundle->key);
kfree(bundle);
}
void rxrpc_put_bundle(struct rxrpc_bundle *bundle, enum rxrpc_bundle_trace why)
{
- unsigned int id = bundle->debug_id;
+ unsigned int id;
bool dead;
int r;
- dead = __refcount_dec_and_test(&bundle->ref, &r);
- trace_rxrpc_bundle(id, r - 1, why);
- if (dead)
- rxrpc_free_bundle(bundle);
+ if (bundle) {
+ id = bundle->debug_id;
+ dead = __refcount_dec_and_test(&bundle->ref, &r);
+ trace_rxrpc_bundle(id, r - 1, why);
+ if (dead)
+ rxrpc_free_bundle(bundle);
+ }
+}
+
+/*
+ * Get rid of outstanding client connection preallocations when a local
+ * endpoint is destroyed.
+ */
+void rxrpc_purge_client_connections(struct rxrpc_local *local)
+{
+ rxrpc_destroy_client_conn_ids(local);
}
/*
* Allocate a client connection.
*/
static struct rxrpc_connection *
-rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp)
+rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle)
{
struct rxrpc_connection *conn;
- struct rxrpc_net *rxnet = bundle->local->rxnet;
- int ret;
+ struct rxrpc_local *local = bundle->local;
+ struct rxrpc_net *rxnet = local->rxnet;
+ int id;
_enter("");
- conn = rxrpc_alloc_connection(rxnet, gfp);
- if (!conn) {
- _leave(" = -ENOMEM");
+ conn = rxrpc_alloc_connection(rxnet, GFP_ATOMIC | __GFP_NOWARN);
+ if (!conn)
return ERR_PTR(-ENOMEM);
+
+ id = idr_alloc_cyclic(&local->conn_ids, conn, 1, 0x40000000,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (id < 0) {
+ kfree(conn);
+ return ERR_PTR(id);
}
refcount_set(&conn->ref, 1);
- conn->bundle = bundle;
- conn->local = bundle->local;
- conn->peer = bundle->peer;
- conn->key = bundle->key;
+ conn->proto.cid = id << RXRPC_CIDSHIFT;
+ conn->proto.epoch = local->rxnet->epoch;
+ conn->out_clientflag = RXRPC_CLIENT_INITIATED;
+ conn->bundle = rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_conn);
+ conn->local = rxrpc_get_local(bundle->local, rxrpc_local_get_client_conn);
+ conn->peer = rxrpc_get_peer(bundle->peer, rxrpc_peer_get_client_conn);
+ conn->key = key_get(bundle->key);
+ conn->security = bundle->security;
conn->exclusive = bundle->exclusive;
conn->upgrade = bundle->upgrade;
conn->orig_service_id = bundle->service_id;
conn->security_level = bundle->security_level;
- conn->out_clientflag = RXRPC_CLIENT_INITIATED;
- conn->state = RXRPC_CONN_CLIENT;
+ conn->state = RXRPC_CONN_CLIENT_UNSECURED;
conn->service_id = conn->orig_service_id;
- ret = rxrpc_get_client_connection_id(conn, gfp);
- if (ret < 0)
- goto error_0;
-
- ret = rxrpc_init_client_conn_security(conn);
- if (ret < 0)
- goto error_1;
+ if (conn->security == &rxrpc_no_security)
+ conn->state = RXRPC_CONN_CLIENT;
atomic_inc(&rxnet->nr_conns);
write_lock(&rxnet->conn_lock);
list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
write_unlock(&rxnet->conn_lock);
- rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_conn);
- rxrpc_get_peer(conn->peer, rxrpc_peer_get_client_conn);
- rxrpc_get_local(conn->local, rxrpc_local_get_client_conn);
- key_get(conn->key);
-
- trace_rxrpc_conn(conn->debug_id, refcount_read(&conn->ref),
- rxrpc_conn_new_client);
+ rxrpc_see_connection(conn, rxrpc_conn_new_client);
atomic_inc(&rxnet->nr_client_conns);
trace_rxrpc_client(conn, -1, rxrpc_client_alloc);
- _leave(" = %p", conn);
return conn;
-
-error_1:
- rxrpc_put_client_connection_id(conn);
-error_0:
- kfree(conn);
- _leave(" = %d", ret);
- return ERR_PTR(ret);
}
/*
@@ -247,7 +205,8 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags))
goto dont_reuse;
- if (conn->state != RXRPC_CONN_CLIENT ||
+ if ((conn->state != RXRPC_CONN_CLIENT_UNSECURED &&
+ conn->state != RXRPC_CONN_CLIENT) ||
conn->proto.epoch != rxnet->epoch)
goto mark_dont_reuse;
@@ -257,7 +216,7 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
* times the maximum number of client conns away from the current
* allocation point to try and keep the IDs concentrated.
*/
- id_cursor = idr_get_cursor(&rxrpc_client_conn_ids);
+ id_cursor = idr_get_cursor(&conn->local->conn_ids);
id = conn->proto.cid >> RXRPC_CIDSHIFT;
distance = id - id_cursor;
if (distance < 0)
@@ -278,20 +237,23 @@ dont_reuse:
* Look up the conn bundle that matches the connection parameters, adding it if
* it doesn't yet exist.
*/
-static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *cp,
- gfp_t gfp)
+int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp)
{
static atomic_t rxrpc_bundle_id;
struct rxrpc_bundle *bundle, *candidate;
- struct rxrpc_local *local = cp->local;
+ struct rxrpc_local *local = call->local;
struct rb_node *p, **pp, *parent;
long diff;
+ bool upgrade = test_bit(RXRPC_CALL_UPGRADE, &call->flags);
_enter("{%px,%x,%u,%u}",
- cp->peer, key_serial(cp->key), cp->security_level, cp->upgrade);
+ call->peer, key_serial(call->key), call->security_level,
+ upgrade);
- if (cp->exclusive)
- return rxrpc_alloc_bundle(cp, gfp);
+ if (test_bit(RXRPC_CALL_EXCLUSIVE, &call->flags)) {
+ call->bundle = rxrpc_alloc_bundle(call, gfp);
+ return call->bundle ? 0 : -ENOMEM;
+ }
/* First, see if the bundle is already there. */
_debug("search 1");
@@ -300,11 +262,11 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
while (p) {
bundle = rb_entry(p, struct rxrpc_bundle, local_node);
-#define cmp(X) ((long)bundle->X - (long)cp->X)
- diff = (cmp(peer) ?:
- cmp(key) ?:
- cmp(security_level) ?:
- cmp(upgrade));
+#define cmp(X, Y) ((long)(X) - (long)(Y))
+ diff = (cmp(bundle->peer, call->peer) ?:
+ cmp(bundle->key, call->key) ?:
+ cmp(bundle->security_level, call->security_level) ?:
+ cmp(bundle->upgrade, upgrade));
#undef cmp
if (diff < 0)
p = p->rb_left;
@@ -317,9 +279,9 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
_debug("not found");
/* It wasn't. We need to add one. */
- candidate = rxrpc_alloc_bundle(cp, gfp);
+ candidate = rxrpc_alloc_bundle(call, gfp);
if (!candidate)
- return NULL;
+ return -ENOMEM;
_debug("search 2");
spin_lock(&local->client_bundles_lock);
@@ -329,11 +291,11 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
parent = *pp;
bundle = rb_entry(parent, struct rxrpc_bundle, local_node);
-#define cmp(X) ((long)bundle->X - (long)cp->X)
- diff = (cmp(peer) ?:
- cmp(key) ?:
- cmp(security_level) ?:
- cmp(upgrade));
+#define cmp(X, Y) ((long)(X) - (long)(Y))
+ diff = (cmp(bundle->peer, call->peer) ?:
+ cmp(bundle->key, call->key) ?:
+ cmp(bundle->security_level, call->security_level) ?:
+ cmp(bundle->upgrade, upgrade));
#undef cmp
if (diff < 0)
pp = &(*pp)->rb_left;
@@ -347,178 +309,89 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
candidate->debug_id = atomic_inc_return(&rxrpc_bundle_id);
rb_link_node(&candidate->local_node, parent, pp);
rb_insert_color(&candidate->local_node, &local->client_bundles);
- rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call);
+ call->bundle = rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call);
spin_unlock(&local->client_bundles_lock);
- _leave(" = %u [new]", candidate->debug_id);
- return candidate;
+ _leave(" = B=%u [new]", call->bundle->debug_id);
+ return 0;
found_bundle_free:
rxrpc_free_bundle(candidate);
found_bundle:
- rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_call);
- atomic_inc(&bundle->active);
+ call->bundle = rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_call);
+ rxrpc_activate_bundle(bundle);
spin_unlock(&local->client_bundles_lock);
- _leave(" = %u [found]", bundle->debug_id);
- return bundle;
-}
-
-/*
- * Create or find a client bundle to use for a call.
- *
- * If we return with a connection, the call will be on its waiting list. It's
- * left to the caller to assign a channel and wake up the call.
- */
-static struct rxrpc_bundle *rxrpc_prep_call(struct rxrpc_sock *rx,
- struct rxrpc_call *call,
- struct rxrpc_conn_parameters *cp,
- struct sockaddr_rxrpc *srx,
- gfp_t gfp)
-{
- struct rxrpc_bundle *bundle;
-
- _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
-
- cp->peer = rxrpc_lookup_peer(rx, cp->local, srx, gfp);
- if (!cp->peer)
- goto error;
-
- call->tx_last_sent = ktime_get_real();
- call->cong_ssthresh = cp->peer->cong_ssthresh;
- if (call->cong_cwnd >= call->cong_ssthresh)
- call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
- else
- call->cong_mode = RXRPC_CALL_SLOW_START;
- if (cp->upgrade)
- __set_bit(RXRPC_CALL_UPGRADE, &call->flags);
-
- /* Find the client connection bundle. */
- bundle = rxrpc_look_up_bundle(cp, gfp);
- if (!bundle)
- goto error;
-
- /* Get this call queued. Someone else may activate it whilst we're
- * lining up a new connection, but that's fine.
- */
- spin_lock(&bundle->channel_lock);
- list_add_tail(&call->chan_wait_link, &bundle->waiting_calls);
- spin_unlock(&bundle->channel_lock);
-
- _leave(" = [B=%x]", bundle->debug_id);
- return bundle;
-
-error:
- _leave(" = -ENOMEM");
- return ERR_PTR(-ENOMEM);
+ _leave(" = B=%u [found]", call->bundle->debug_id);
+ return 0;
}
/*
* Allocate a new connection and add it into a bundle.
*/
-static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp)
- __releases(bundle->channel_lock)
+static bool rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle,
+ unsigned int slot)
{
- struct rxrpc_connection *candidate = NULL, *old = NULL;
- bool conflict;
- int i;
-
- _enter("");
-
- conflict = bundle->alloc_conn;
- if (!conflict)
- bundle->alloc_conn = true;
- spin_unlock(&bundle->channel_lock);
- if (conflict) {
- _leave(" [conf]");
- return;
- }
-
- candidate = rxrpc_alloc_client_connection(bundle, gfp);
-
- spin_lock(&bundle->channel_lock);
- bundle->alloc_conn = false;
-
- if (IS_ERR(candidate)) {
- bundle->alloc_error = PTR_ERR(candidate);
- spin_unlock(&bundle->channel_lock);
- _leave(" [err %ld]", PTR_ERR(candidate));
- return;
- }
-
- bundle->alloc_error = 0;
-
- for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) {
- unsigned int shift = i * RXRPC_MAXCALLS;
- int j;
-
- old = bundle->conns[i];
- if (!rxrpc_may_reuse_conn(old)) {
- if (old)
- trace_rxrpc_client(old, -1, rxrpc_client_replace);
- candidate->bundle_shift = shift;
- atomic_inc(&bundle->active);
- bundle->conns[i] = candidate;
- for (j = 0; j < RXRPC_MAXCALLS; j++)
- set_bit(shift + j, &bundle->avail_chans);
- candidate = NULL;
- break;
- }
+ struct rxrpc_connection *conn, *old;
+ unsigned int shift = slot * RXRPC_MAXCALLS;
+ unsigned int i;
- old = NULL;
+ old = bundle->conns[slot];
+ if (old) {
+ bundle->conns[slot] = NULL;
+ trace_rxrpc_client(old, -1, rxrpc_client_replace);
+ rxrpc_put_connection(old, rxrpc_conn_put_noreuse);
}
- spin_unlock(&bundle->channel_lock);
-
- if (candidate) {
- _debug("discard C=%x", candidate->debug_id);
- trace_rxrpc_client(candidate, -1, rxrpc_client_duplicate);
- rxrpc_put_connection(candidate, rxrpc_conn_put_discard);
+ conn = rxrpc_alloc_client_connection(bundle);
+ if (IS_ERR(conn)) {
+ bundle->alloc_error = PTR_ERR(conn);
+ return false;
}
- rxrpc_put_connection(old, rxrpc_conn_put_noreuse);
- _leave("");
+ rxrpc_activate_bundle(bundle);
+ conn->bundle_shift = shift;
+ bundle->conns[slot] = conn;
+ for (i = 0; i < RXRPC_MAXCALLS; i++)
+ set_bit(shift + i, &bundle->avail_chans);
+ return true;
}
/*
* Add a connection to a bundle if there are no usable connections or we have
* connections waiting for extra capacity.
*/
-static void rxrpc_maybe_add_conn(struct rxrpc_bundle *bundle, gfp_t gfp)
+static bool rxrpc_bundle_has_space(struct rxrpc_bundle *bundle)
{
- struct rxrpc_call *call;
- int i, usable;
+ int slot = -1, i, usable;
_enter("");
- spin_lock(&bundle->channel_lock);
+ bundle->alloc_error = 0;
/* See if there are any usable connections. */
usable = 0;
- for (i = 0; i < ARRAY_SIZE(bundle->conns); i++)
+ for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) {
if (rxrpc_may_reuse_conn(bundle->conns[i]))
usable++;
-
- if (!usable && !list_empty(&bundle->waiting_calls)) {
- call = list_first_entry(&bundle->waiting_calls,
- struct rxrpc_call, chan_wait_link);
- if (test_bit(RXRPC_CALL_UPGRADE, &call->flags))
- bundle->try_upgrade = true;
+ else if (slot == -1)
+ slot = i;
}
+ if (!usable && bundle->upgrade)
+ bundle->try_upgrade = true;
+
if (!usable)
goto alloc_conn;
if (!bundle->avail_chans &&
!bundle->try_upgrade &&
- !list_empty(&bundle->waiting_calls) &&
usable < ARRAY_SIZE(bundle->conns))
goto alloc_conn;
- spin_unlock(&bundle->channel_lock);
_leave("");
- return;
+ return usable;
alloc_conn:
- return rxrpc_add_conn_to_bundle(bundle, gfp);
+ return slot >= 0 ? rxrpc_add_conn_to_bundle(bundle, slot) : false;
}
/*
@@ -532,11 +405,13 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
struct rxrpc_channel *chan = &conn->channels[channel];
struct rxrpc_bundle *bundle = conn->bundle;
struct rxrpc_call *call = list_entry(bundle->waiting_calls.next,
- struct rxrpc_call, chan_wait_link);
+ struct rxrpc_call, wait_link);
u32 call_id = chan->call_counter + 1;
_enter("C=%x,%u", conn->debug_id, channel);
+ list_del_init(&call->wait_link);
+
trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate);
/* Cancel the final ACK on the previous call if it hasn't been sent yet
@@ -546,68 +421,50 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
clear_bit(conn->bundle_shift + channel, &bundle->avail_chans);
rxrpc_see_call(call, rxrpc_call_see_activate_client);
- list_del_init(&call->chan_wait_link);
- call->peer = rxrpc_get_peer(conn->peer, rxrpc_peer_get_activate_call);
call->conn = rxrpc_get_connection(conn, rxrpc_conn_get_activate_call);
call->cid = conn->proto.cid | channel;
call->call_id = call_id;
call->dest_srx.srx_service = conn->service_id;
-
- trace_rxrpc_connect_call(call);
-
- write_lock(&call->state_lock);
- call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
- write_unlock(&call->state_lock);
-
- /* Paired with the read barrier in rxrpc_connect_call(). This orders
- * cid and epoch in the connection wrt to call_id without the need to
- * take the channel_lock.
- *
- * We provisionally assign a callNumber at this point, but we don't
- * confirm it until the call is about to be exposed.
- *
- * TODO: Pair with a barrier in the data_ready handler when that looks
- * at the call ID through a connection channel.
- */
- smp_wmb();
+ call->cong_ssthresh = call->peer->cong_ssthresh;
+ if (call->cong_cwnd >= call->cong_ssthresh)
+ call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
+ else
+ call->cong_mode = RXRPC_CALL_SLOW_START;
chan->call_id = call_id;
chan->call_debug_id = call->debug_id;
- rcu_assign_pointer(chan->call, call);
+ chan->call = call;
+
+ rxrpc_see_call(call, rxrpc_call_see_connected);
+ trace_rxrpc_connect_call(call);
+ call->tx_last_sent = ktime_get_real();
+ rxrpc_start_call_timer(call);
+ rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_SEND_REQUEST);
wake_up(&call->waitq);
}
/*
* Remove a connection from the idle list if it's on it.
*/
-static void rxrpc_unidle_conn(struct rxrpc_bundle *bundle, struct rxrpc_connection *conn)
+static void rxrpc_unidle_conn(struct rxrpc_connection *conn)
{
- struct rxrpc_net *rxnet = bundle->local->rxnet;
- bool drop_ref;
-
if (!list_empty(&conn->cache_link)) {
- drop_ref = false;
- spin_lock(&rxnet->client_conn_cache_lock);
- if (!list_empty(&conn->cache_link)) {
- list_del_init(&conn->cache_link);
- drop_ref = true;
- }
- spin_unlock(&rxnet->client_conn_cache_lock);
- if (drop_ref)
- rxrpc_put_connection(conn, rxrpc_conn_put_unidle);
+ list_del_init(&conn->cache_link);
+ rxrpc_put_connection(conn, rxrpc_conn_put_unidle);
}
}
/*
- * Assign channels and callNumbers to waiting calls with channel_lock
- * held by caller.
+ * Assign channels and callNumbers to waiting calls.
*/
-static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle)
+static void rxrpc_activate_channels(struct rxrpc_bundle *bundle)
{
struct rxrpc_connection *conn;
unsigned long avail, mask;
unsigned int channel, slot;
+ trace_rxrpc_client(NULL, -1, rxrpc_client_activate_chans);
+
if (bundle->try_upgrade)
mask = 1;
else
@@ -627,7 +484,7 @@ static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle)
if (bundle->try_upgrade)
set_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags);
- rxrpc_unidle_conn(bundle, conn);
+ rxrpc_unidle_conn(conn);
channel &= (RXRPC_MAXCALLS - 1);
conn->act_chans |= 1 << channel;
@@ -636,132 +493,24 @@ static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle)
}
/*
- * Assign channels and callNumbers to waiting calls.
- */
-static void rxrpc_activate_channels(struct rxrpc_bundle *bundle)
-{
- _enter("B=%x", bundle->debug_id);
-
- trace_rxrpc_client(NULL, -1, rxrpc_client_activate_chans);
-
- if (!bundle->avail_chans)
- return;
-
- spin_lock(&bundle->channel_lock);
- rxrpc_activate_channels_locked(bundle);
- spin_unlock(&bundle->channel_lock);
- _leave("");
-}
-
-/*
- * Wait for a callNumber and a channel to be granted to a call.
- */
-static int rxrpc_wait_for_channel(struct rxrpc_bundle *bundle,
- struct rxrpc_call *call, gfp_t gfp)
-{
- DECLARE_WAITQUEUE(myself, current);
- int ret = 0;
-
- _enter("%d", call->debug_id);
-
- if (!gfpflags_allow_blocking(gfp)) {
- rxrpc_maybe_add_conn(bundle, gfp);
- rxrpc_activate_channels(bundle);
- ret = bundle->alloc_error ?: -EAGAIN;
- goto out;
- }
-
- add_wait_queue_exclusive(&call->waitq, &myself);
- for (;;) {
- rxrpc_maybe_add_conn(bundle, gfp);
- rxrpc_activate_channels(bundle);
- ret = bundle->alloc_error;
- if (ret < 0)
- break;
-
- switch (call->interruptibility) {
- case RXRPC_INTERRUPTIBLE:
- case RXRPC_PREINTERRUPTIBLE:
- set_current_state(TASK_INTERRUPTIBLE);
- break;
- case RXRPC_UNINTERRUPTIBLE:
- default:
- set_current_state(TASK_UNINTERRUPTIBLE);
- break;
- }
- if (READ_ONCE(call->state) != RXRPC_CALL_CLIENT_AWAIT_CONN)
- break;
- if ((call->interruptibility == RXRPC_INTERRUPTIBLE ||
- call->interruptibility == RXRPC_PREINTERRUPTIBLE) &&
- signal_pending(current)) {
- ret = -ERESTARTSYS;
- break;
- }
- schedule();
- }
- remove_wait_queue(&call->waitq, &myself);
- __set_current_state(TASK_RUNNING);
-
-out:
- _leave(" = %d", ret);
- return ret;
-}
-
-/*
- * find a connection for a call
- * - called in process context with IRQs enabled
+ * Connect waiting channels (called from the I/O thread).
*/
-int rxrpc_connect_call(struct rxrpc_sock *rx,
- struct rxrpc_call *call,
- struct rxrpc_conn_parameters *cp,
- struct sockaddr_rxrpc *srx,
- gfp_t gfp)
+void rxrpc_connect_client_calls(struct rxrpc_local *local)
{
- struct rxrpc_bundle *bundle;
- struct rxrpc_net *rxnet = cp->local->rxnet;
- int ret = 0;
-
- _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
-
- rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper);
+ struct rxrpc_call *call;
- bundle = rxrpc_prep_call(rx, call, cp, srx, gfp);
- if (IS_ERR(bundle)) {
- ret = PTR_ERR(bundle);
- goto out;
- }
+ while ((call = list_first_entry_or_null(&local->new_client_calls,
+ struct rxrpc_call, wait_link))
+ ) {
+ struct rxrpc_bundle *bundle = call->bundle;
- if (call->state == RXRPC_CALL_CLIENT_AWAIT_CONN) {
- ret = rxrpc_wait_for_channel(bundle, call, gfp);
- if (ret < 0)
- goto wait_failed;
- }
+ spin_lock(&local->client_call_lock);
+ list_move_tail(&call->wait_link, &bundle->waiting_calls);
+ spin_unlock(&local->client_call_lock);
-granted_channel:
- /* Paired with the write barrier in rxrpc_activate_one_channel(). */
- smp_rmb();
-
-out_put_bundle:
- rxrpc_deactivate_bundle(bundle);
- rxrpc_put_bundle(bundle, rxrpc_bundle_get_client_call);
-out:
- _leave(" = %d", ret);
- return ret;
-
-wait_failed:
- spin_lock(&bundle->channel_lock);
- list_del_init(&call->chan_wait_link);
- spin_unlock(&bundle->channel_lock);
-
- if (call->state != RXRPC_CALL_CLIENT_AWAIT_CONN) {
- ret = 0;
- goto granted_channel;
+ if (rxrpc_bundle_has_space(bundle))
+ rxrpc_activate_channels(bundle);
}
-
- trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed);
- rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret);
- rxrpc_disconnect_client_call(bundle, call);
- goto out_put_bundle;
}
/*
@@ -794,14 +543,14 @@ void rxrpc_expose_client_call(struct rxrpc_call *call)
/*
* Set the reap timer.
*/
-static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet)
+static void rxrpc_set_client_reap_timer(struct rxrpc_local *local)
{
- if (!rxnet->kill_all_client_conns) {
+ if (!local->kill_all_client_conns) {
unsigned long now = jiffies;
unsigned long reap_at = now + rxrpc_conn_idle_client_expiry;
- if (rxnet->live)
- timer_reduce(&rxnet->client_conn_reap_timer, reap_at);
+ if (local->rxnet->live)
+ timer_reduce(&local->client_conn_reap_timer, reap_at);
}
}
@@ -812,16 +561,13 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
{
struct rxrpc_connection *conn;
struct rxrpc_channel *chan = NULL;
- struct rxrpc_net *rxnet = bundle->local->rxnet;
+ struct rxrpc_local *local = bundle->local;
unsigned int channel;
bool may_reuse;
u32 cid;
_enter("c=%x", call->debug_id);
- spin_lock(&bundle->channel_lock);
- set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
-
/* Calls that have never actually been assigned a channel can simply be
* discarded.
*/
@@ -830,8 +576,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
_debug("call is waiting");
ASSERTCMP(call->call_id, ==, 0);
ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags));
- list_del_init(&call->chan_wait_link);
- goto out;
+ list_del_init(&call->wait_link);
+ return;
}
cid = call->cid;
@@ -839,10 +585,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
chan = &conn->channels[channel];
trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
- if (rcu_access_pointer(chan->call) != call) {
- spin_unlock(&bundle->channel_lock);
- BUG();
- }
+ if (WARN_ON(chan->call != call))
+ return;
may_reuse = rxrpc_may_reuse_conn(conn);
@@ -863,16 +607,15 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
trace_rxrpc_client(conn, channel, rxrpc_client_to_active);
bundle->try_upgrade = false;
if (may_reuse)
- rxrpc_activate_channels_locked(bundle);
+ rxrpc_activate_channels(bundle);
}
-
}
/* See if we can pass the channel directly to another call. */
if (may_reuse && !list_empty(&bundle->waiting_calls)) {
trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass);
rxrpc_activate_one_channel(conn, channel);
- goto out;
+ return;
}
/* Schedule the final ACK to be transmitted in a short while so that it
@@ -890,7 +633,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
}
/* Deactivate the channel. */
- rcu_assign_pointer(chan->call, NULL);
+ chan->call = NULL;
set_bit(conn->bundle_shift + channel, &conn->bundle->avail_chans);
conn->act_chans &= ~(1 << channel);
@@ -903,17 +646,10 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
conn->idle_timestamp = jiffies;
rxrpc_get_connection(conn, rxrpc_conn_get_idle);
- spin_lock(&rxnet->client_conn_cache_lock);
- list_move_tail(&conn->cache_link, &rxnet->idle_client_conns);
- spin_unlock(&rxnet->client_conn_cache_lock);
+ list_move_tail(&conn->cache_link, &local->idle_client_conns);
- rxrpc_set_client_reap_timer(rxnet);
+ rxrpc_set_client_reap_timer(local);
}
-
-out:
- spin_unlock(&bundle->channel_lock);
- _leave("");
- return;
}
/*
@@ -923,7 +659,6 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
{
struct rxrpc_bundle *bundle = conn->bundle;
unsigned int bindex;
- bool need_drop = false;
int i;
_enter("C=%x", conn->debug_id);
@@ -931,18 +666,13 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
rxrpc_process_delayed_final_acks(conn, true);
- spin_lock(&bundle->channel_lock);
bindex = conn->bundle_shift / RXRPC_MAXCALLS;
if (bundle->conns[bindex] == conn) {
_debug("clear slot %u", bindex);
bundle->conns[bindex] = NULL;
for (i = 0; i < RXRPC_MAXCALLS; i++)
clear_bit(conn->bundle_shift + i, &bundle->avail_chans);
- need_drop = true;
- }
- spin_unlock(&bundle->channel_lock);
-
- if (need_drop) {
+ rxrpc_put_client_connection_id(bundle->local, conn);
rxrpc_deactivate_bundle(bundle);
rxrpc_put_connection(conn, rxrpc_conn_put_unbundle);
}
@@ -951,11 +681,15 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
/*
* Drop the active count on a bundle.
*/
-static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle)
+void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle)
{
- struct rxrpc_local *local = bundle->local;
+ struct rxrpc_local *local;
bool need_put = false;
+ if (!bundle)
+ return;
+
+ local = bundle->local;
if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) {
if (!bundle->exclusive) {
_debug("erase bundle");
@@ -982,7 +716,7 @@ void rxrpc_kill_client_conn(struct rxrpc_connection *conn)
trace_rxrpc_client(conn, -1, rxrpc_client_cleanup);
atomic_dec(&rxnet->nr_client_conns);
- rxrpc_put_client_connection_id(conn);
+ rxrpc_put_client_connection_id(local, conn);
}
/*
@@ -992,42 +726,26 @@ void rxrpc_kill_client_conn(struct rxrpc_connection *conn)
* This may be called from conn setup or from a work item so cannot be
* considered non-reentrant.
*/
-void rxrpc_discard_expired_client_conns(struct work_struct *work)
+void rxrpc_discard_expired_client_conns(struct rxrpc_local *local)
{
struct rxrpc_connection *conn;
- struct rxrpc_net *rxnet =
- container_of(work, struct rxrpc_net, client_conn_reaper);
unsigned long expiry, conn_expires_at, now;
unsigned int nr_conns;
_enter("");
- if (list_empty(&rxnet->idle_client_conns)) {
- _leave(" [empty]");
- return;
- }
-
- /* Don't double up on the discarding */
- if (!mutex_trylock(&rxnet->client_conn_discard_lock)) {
- _leave(" [already]");
- return;
- }
-
/* We keep an estimate of what the number of conns ought to be after
* we've discarded some so that we don't overdo the discarding.
*/
- nr_conns = atomic_read(&rxnet->nr_client_conns);
+ nr_conns = atomic_read(&local->rxnet->nr_client_conns);
next:
- spin_lock(&rxnet->client_conn_cache_lock);
-
- if (list_empty(&rxnet->idle_client_conns))
- goto out;
-
- conn = list_entry(rxnet->idle_client_conns.next,
- struct rxrpc_connection, cache_link);
+ conn = list_first_entry_or_null(&local->idle_client_conns,
+ struct rxrpc_connection, cache_link);
+ if (!conn)
+ return;
- if (!rxnet->kill_all_client_conns) {
+ if (!local->kill_all_client_conns) {
/* If the number of connections is over the reap limit, we
* expedite discard by reducing the expiry timeout. We must,
* however, have at least a short grace period to be able to do
@@ -1050,8 +768,6 @@ next:
trace_rxrpc_client(conn, -1, rxrpc_client_discard);
list_del_init(&conn->cache_link);
- spin_unlock(&rxnet->client_conn_cache_lock);
-
rxrpc_unbundle_conn(conn);
/* Drop the ->cache_link ref */
rxrpc_put_connection(conn, rxrpc_conn_put_discard_idle);
@@ -1068,31 +784,8 @@ not_yet_expired:
* then things get messier.
*/
_debug("not yet");
- if (!rxnet->kill_all_client_conns)
- timer_reduce(&rxnet->client_conn_reap_timer, conn_expires_at);
-
-out:
- spin_unlock(&rxnet->client_conn_cache_lock);
- mutex_unlock(&rxnet->client_conn_discard_lock);
- _leave("");
-}
-
-/*
- * Preemptively destroy all the client connection records rather than waiting
- * for them to time out
- */
-void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet)
-{
- _enter("");
-
- spin_lock(&rxnet->client_conn_cache_lock);
- rxnet->kill_all_client_conns = true;
- spin_unlock(&rxnet->client_conn_cache_lock);
-
- del_timer_sync(&rxnet->client_conn_reap_timer);
-
- if (!rxrpc_queue_work(&rxnet->client_conn_reaper))
- _debug("destroy: queue failed");
+ if (!local->kill_all_client_conns)
+ timer_reduce(&local->client_conn_reap_timer, conn_expires_at);
_leave("");
}
@@ -1102,29 +795,19 @@ void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet)
*/
void rxrpc_clean_up_local_conns(struct rxrpc_local *local)
{
- struct rxrpc_connection *conn, *tmp;
- struct rxrpc_net *rxnet = local->rxnet;
- LIST_HEAD(graveyard);
+ struct rxrpc_connection *conn;
_enter("");
- spin_lock(&rxnet->client_conn_cache_lock);
-
- list_for_each_entry_safe(conn, tmp, &rxnet->idle_client_conns,
- cache_link) {
- if (conn->local == local) {
- atomic_dec(&conn->active);
- trace_rxrpc_client(conn, -1, rxrpc_client_discard);
- list_move(&conn->cache_link, &graveyard);
- }
- }
+ local->kill_all_client_conns = true;
- spin_unlock(&rxnet->client_conn_cache_lock);
+ del_timer_sync(&local->client_conn_reap_timer);
- while (!list_empty(&graveyard)) {
- conn = list_entry(graveyard.next,
- struct rxrpc_connection, cache_link);
+ while ((conn = list_first_entry_or_null(&local->idle_client_conns,
+ struct rxrpc_connection, cache_link))) {
list_del_init(&conn->cache_link);
+ atomic_dec(&conn->active);
+ trace_rxrpc_client(conn, -1, rxrpc_client_discard);
rxrpc_unbundle_conn(conn);
rxrpc_put_connection(conn, rxrpc_conn_put_local_dead);
}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 480364bcbf85..44414e724415 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -17,11 +17,65 @@
#include "ar-internal.h"
/*
+ * Set the completion state on an aborted connection.
+ */
+static bool rxrpc_set_conn_aborted(struct rxrpc_connection *conn, struct sk_buff *skb,
+ s32 abort_code, int err,
+ enum rxrpc_call_completion compl)
+{
+ bool aborted = false;
+
+ if (conn->state != RXRPC_CONN_ABORTED) {
+ spin_lock(&conn->state_lock);
+ if (conn->state != RXRPC_CONN_ABORTED) {
+ conn->abort_code = abort_code;
+ conn->error = err;
+ conn->completion = compl;
+ /* Order the abort info before the state change. */
+ smp_store_release(&conn->state, RXRPC_CONN_ABORTED);
+ set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+ set_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events);
+ aborted = true;
+ }
+ spin_unlock(&conn->state_lock);
+ }
+
+ return aborted;
+}
+
+/*
+ * Mark a socket buffer to indicate that the connection it's on should be aborted.
+ */
+int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb,
+ s32 abort_code, int err, enum rxrpc_abort_reason why)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ if (rxrpc_set_conn_aborted(conn, skb, abort_code, err,
+ RXRPC_CALL_LOCALLY_ABORTED)) {
+ trace_rxrpc_abort(0, why, sp->hdr.cid, sp->hdr.callNumber,
+ sp->hdr.seq, abort_code, err);
+ rxrpc_poke_conn(conn, rxrpc_conn_get_poke_abort);
+ }
+ return -EPROTO;
+}
+
+/*
+ * Mark a connection as being remotely aborted.
+ */
+static bool rxrpc_input_conn_abort(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ return rxrpc_set_conn_aborted(conn, skb, skb->priority, -ECONNABORTED,
+ RXRPC_CALL_REMOTELY_ABORTED);
+}
+
+/*
* Retransmit terminal ACK or ABORT of the previous call.
*/
-static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
- struct sk_buff *skb,
- unsigned int channel)
+void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
+ struct sk_buff *skb,
+ unsigned int channel)
{
struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL;
struct rxrpc_channel *chan;
@@ -46,9 +100,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
/* If the last call got moved on whilst we were waiting to run, just
* ignore this packet.
*/
- call_id = READ_ONCE(chan->last_call);
- /* Sync with __rxrpc_disconnect_call() */
- smp_rmb();
+ call_id = chan->last_call;
if (skb && call_id != sp->hdr.callNumber)
return;
@@ -65,9 +117,12 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
iov[2].iov_base = &ack_info;
iov[2].iov_len = sizeof(ack_info);
+ serial = atomic_inc_return(&conn->serial);
+
pkt.whdr.epoch = htonl(conn->proto.epoch);
pkt.whdr.cid = htonl(conn->proto.cid | channel);
pkt.whdr.callNumber = htonl(call_id);
+ pkt.whdr.serial = htonl(serial);
pkt.whdr.seq = 0;
pkt.whdr.type = chan->last_type;
pkt.whdr.flags = conn->out_clientflag;
@@ -104,31 +159,15 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
iov[0].iov_len += sizeof(pkt.ack);
len += sizeof(pkt.ack) + 3 + sizeof(ack_info);
ioc = 3;
- break;
-
- default:
- return;
- }
-
- /* Resync with __rxrpc_disconnect_call() and check that the last call
- * didn't get advanced whilst we were filling out the packets.
- */
- smp_rmb();
- if (READ_ONCE(chan->last_call) != call_id)
- return;
-
- serial = atomic_inc_return(&conn->serial);
- pkt.whdr.serial = htonl(serial);
- switch (chan->last_type) {
- case RXRPC_PACKET_TYPE_ABORT:
- break;
- case RXRPC_PACKET_TYPE_ACK:
trace_rxrpc_tx_ack(chan->call_debug_id, serial,
ntohl(pkt.ack.firstPacket),
ntohl(pkt.ack.serial),
pkt.ack.reason, 0);
break;
+
+ default:
+ return;
}
ret = kernel_sendmsg(conn->local->socket, &msg, iov, ioc, len);
@@ -146,131 +185,34 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
/*
* pass a connection-level abort onto all calls on that connection
*/
-static void rxrpc_abort_calls(struct rxrpc_connection *conn,
- enum rxrpc_call_completion compl,
- rxrpc_serial_t serial)
+static void rxrpc_abort_calls(struct rxrpc_connection *conn)
{
struct rxrpc_call *call;
int i;
_enter("{%d},%x", conn->debug_id, conn->abort_code);
- spin_lock(&conn->bundle->channel_lock);
-
for (i = 0; i < RXRPC_MAXCALLS; i++) {
- call = rcu_dereference_protected(
- conn->channels[i].call,
- lockdep_is_held(&conn->bundle->channel_lock));
- if (call) {
- if (compl == RXRPC_CALL_LOCALLY_ABORTED)
- trace_rxrpc_abort(call->debug_id,
- "CON", call->cid,
- call->call_id, 0,
+ call = conn->channels[i].call;
+ if (call)
+ rxrpc_set_call_completion(call,
+ conn->completion,
conn->abort_code,
conn->error);
- else
- trace_rxrpc_rx_abort(call, serial,
- conn->abort_code);
- rxrpc_set_call_completion(call, compl,
- conn->abort_code,
- conn->error);
- }
}
- spin_unlock(&conn->bundle->channel_lock);
_leave("");
}
/*
- * generate a connection-level abort
- */
-static int rxrpc_abort_connection(struct rxrpc_connection *conn,
- int error, u32 abort_code)
-{
- struct rxrpc_wire_header whdr;
- struct msghdr msg;
- struct kvec iov[2];
- __be32 word;
- size_t len;
- u32 serial;
- int ret;
-
- _enter("%d,,%u,%u", conn->debug_id, error, abort_code);
-
- /* generate a connection-level abort */
- spin_lock(&conn->state_lock);
- if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
- spin_unlock(&conn->state_lock);
- _leave(" = 0 [already dead]");
- return 0;
- }
-
- conn->error = error;
- conn->abort_code = abort_code;
- conn->state = RXRPC_CONN_LOCALLY_ABORTED;
- set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
- spin_unlock(&conn->state_lock);
-
- msg.msg_name = &conn->peer->srx.transport;
- msg.msg_namelen = conn->peer->srx.transport_len;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_flags = 0;
-
- whdr.epoch = htonl(conn->proto.epoch);
- whdr.cid = htonl(conn->proto.cid);
- whdr.callNumber = 0;
- whdr.seq = 0;
- whdr.type = RXRPC_PACKET_TYPE_ABORT;
- whdr.flags = conn->out_clientflag;
- whdr.userStatus = 0;
- whdr.securityIndex = conn->security_ix;
- whdr._rsvd = 0;
- whdr.serviceId = htons(conn->service_id);
-
- word = htonl(conn->abort_code);
-
- iov[0].iov_base = &whdr;
- iov[0].iov_len = sizeof(whdr);
- iov[1].iov_base = &word;
- iov[1].iov_len = sizeof(word);
-
- len = iov[0].iov_len + iov[1].iov_len;
-
- serial = atomic_inc_return(&conn->serial);
- rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, serial);
- whdr.serial = htonl(serial);
-
- ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len);
- if (ret < 0) {
- trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
- rxrpc_tx_point_conn_abort);
- _debug("sendmsg failed: %d", ret);
- return -EAGAIN;
- }
-
- trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort);
-
- conn->peer->last_tx_at = ktime_get_seconds();
-
- _leave(" = 0");
- return 0;
-}
-
-/*
* mark a call as being on a now-secured channel
* - must be called with BH's disabled.
*/
static void rxrpc_call_is_secure(struct rxrpc_call *call)
{
- _enter("%p", call);
- if (call) {
- write_lock(&call->state_lock);
- if (call->state == RXRPC_CALL_SERVER_SECURING) {
- call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
- rxrpc_notify_socket(call);
- }
- write_unlock(&call->state_lock);
+ if (call && __rxrpc_call_state(call) == RXRPC_CALL_SERVER_SECURING) {
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST);
+ rxrpc_notify_socket(call);
}
}
@@ -278,44 +220,22 @@ static void rxrpc_call_is_secure(struct rxrpc_call *call)
* connection-level Rx packet processor
*/
static int rxrpc_process_event(struct rxrpc_connection *conn,
- struct sk_buff *skb,
- u32 *_abort_code)
+ struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- int loop, ret;
+ int ret;
- if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
- _leave(" = -ECONNABORTED [%u]", conn->state);
+ if (conn->state == RXRPC_CONN_ABORTED)
return -ECONNABORTED;
- }
_enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial);
switch (sp->hdr.type) {
- case RXRPC_PACKET_TYPE_DATA:
- case RXRPC_PACKET_TYPE_ACK:
- rxrpc_conn_retransmit_call(conn, skb,
- sp->hdr.cid & RXRPC_CHANNELMASK);
- return 0;
-
- case RXRPC_PACKET_TYPE_BUSY:
- /* Just ignore BUSY packets for now. */
- return 0;
-
- case RXRPC_PACKET_TYPE_ABORT:
- conn->error = -ECONNABORTED;
- conn->abort_code = skb->priority;
- conn->state = RXRPC_CONN_REMOTELY_ABORTED;
- set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
- rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial);
- return -ECONNABORTED;
-
case RXRPC_PACKET_TYPE_CHALLENGE:
- return conn->security->respond_to_challenge(conn, skb,
- _abort_code);
+ return conn->security->respond_to_challenge(conn, skb);
case RXRPC_PACKET_TYPE_RESPONSE:
- ret = conn->security->verify_response(conn, skb, _abort_code);
+ ret = conn->security->verify_response(conn, skb);
if (ret < 0)
return ret;
@@ -324,27 +244,25 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
if (ret < 0)
return ret;
- spin_lock(&conn->bundle->channel_lock);
spin_lock(&conn->state_lock);
-
- if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) {
+ if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING)
conn->state = RXRPC_CONN_SERVICE;
- spin_unlock(&conn->state_lock);
- for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
- rxrpc_call_is_secure(
- rcu_dereference_protected(
- conn->channels[loop].call,
- lockdep_is_held(&conn->bundle->channel_lock)));
- } else {
- spin_unlock(&conn->state_lock);
- }
+ spin_unlock(&conn->state_lock);
- spin_unlock(&conn->bundle->channel_lock);
+ if (conn->state == RXRPC_CONN_SERVICE) {
+ /* Offload call state flipping to the I/O thread. As
+ * we've already received the packet, put it on the
+ * front of the queue.
+ */
+ skb->mark = RXRPC_SKB_MARK_SERVICE_CONN_SECURED;
+ rxrpc_get_skb(skb, rxrpc_skb_get_conn_secured);
+ skb_queue_head(&conn->local->rx_queue, skb);
+ rxrpc_wake_up_io_thread(conn->local);
+ }
return 0;
default:
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
- tracepoint_string("bad_conn_pkt"));
+ WARN_ON_ONCE(1);
return -EPROTO;
}
}
@@ -354,26 +272,9 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
*/
static void rxrpc_secure_connection(struct rxrpc_connection *conn)
{
- u32 abort_code;
- int ret;
-
- _enter("{%d}", conn->debug_id);
-
- ASSERT(conn->security_ix != 0);
-
- if (conn->security->issue_challenge(conn) < 0) {
- abort_code = RX_CALL_DEAD;
- ret = -ENOMEM;
- goto abort;
- }
-
- _leave("");
- return;
-
-abort:
- _debug("abort %d, %d", ret, abort_code);
- rxrpc_abort_connection(conn, ret, abort_code);
- _leave(" [aborted]");
+ if (conn->security->issue_challenge(conn) < 0)
+ rxrpc_abort_conn(conn, NULL, RX_CALL_DEAD, -ENOMEM,
+ rxrpc_abort_nomem);
}
/*
@@ -395,9 +296,7 @@ again:
if (!test_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags))
continue;
- smp_rmb(); /* vs rxrpc_disconnect_client_call */
- ack_at = READ_ONCE(chan->final_ack_at);
-
+ ack_at = chan->final_ack_at;
if (time_before(j, ack_at) && !force) {
if (time_before(ack_at, next_j)) {
next_j = ack_at;
@@ -424,47 +323,27 @@ again:
static void rxrpc_do_process_connection(struct rxrpc_connection *conn)
{
struct sk_buff *skb;
- u32 abort_code = RX_PROTOCOL_ERROR;
int ret;
if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events))
rxrpc_secure_connection(conn);
- /* Process delayed ACKs whose time has come. */
- if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
- rxrpc_process_delayed_final_acks(conn, false);
-
/* go through the conn-level event packets, releasing the ref on this
* connection that each one has when we've finished with it */
while ((skb = skb_dequeue(&conn->rx_queue))) {
rxrpc_see_skb(skb, rxrpc_skb_see_conn_work);
- ret = rxrpc_process_event(conn, skb, &abort_code);
+ ret = rxrpc_process_event(conn, skb);
switch (ret) {
- case -EPROTO:
- case -EKEYEXPIRED:
- case -EKEYREJECTED:
- goto protocol_error;
case -ENOMEM:
case -EAGAIN:
- goto requeue_and_leave;
- case -ECONNABORTED:
+ skb_queue_head(&conn->rx_queue, skb);
+ rxrpc_queue_conn(conn, rxrpc_conn_queue_retry_work);
+ break;
default:
rxrpc_free_skb(skb, rxrpc_skb_put_conn_work);
break;
}
}
-
- return;
-
-requeue_and_leave:
- skb_queue_head(&conn->rx_queue, skb);
- return;
-
-protocol_error:
- if (rxrpc_abort_connection(conn, ret, abort_code) < 0)
- goto requeue_and_leave;
- rxrpc_free_skb(skb, rxrpc_skb_put_conn_work);
- return;
}
void rxrpc_process_connection(struct work_struct *work)
@@ -498,44 +377,59 @@ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
/*
* Input a connection-level packet.
*/
-int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
+bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
- _leave(" = -ECONNABORTED [%u]", conn->state);
- return -ECONNABORTED;
- }
-
- _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial);
-
switch (sp->hdr.type) {
- case RXRPC_PACKET_TYPE_DATA:
- case RXRPC_PACKET_TYPE_ACK:
- rxrpc_conn_retransmit_call(conn, skb,
- sp->hdr.cid & RXRPC_CHANNELMASK);
- return 0;
-
case RXRPC_PACKET_TYPE_BUSY:
/* Just ignore BUSY packets for now. */
- return 0;
+ return true;
case RXRPC_PACKET_TYPE_ABORT:
- conn->error = -ECONNABORTED;
- conn->abort_code = skb->priority;
- conn->state = RXRPC_CONN_REMOTELY_ABORTED;
- set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
- rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial);
- return -ECONNABORTED;
+ if (rxrpc_is_conn_aborted(conn))
+ return true;
+ rxrpc_input_conn_abort(conn, skb);
+ rxrpc_abort_calls(conn);
+ return true;
case RXRPC_PACKET_TYPE_CHALLENGE:
case RXRPC_PACKET_TYPE_RESPONSE:
+ if (rxrpc_is_conn_aborted(conn)) {
+ if (conn->completion == RXRPC_CALL_LOCALLY_ABORTED)
+ rxrpc_send_conn_abort(conn);
+ return true;
+ }
rxrpc_post_packet_to_conn(conn, skb);
- return 0;
+ return true;
default:
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
- tracepoint_string("bad_conn_pkt"));
- return -EPROTO;
+ WARN_ON_ONCE(1);
+ return true;
}
}
+
+/*
+ * Input a connection event.
+ */
+void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb)
+{
+ unsigned int loop;
+
+ if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events))
+ rxrpc_abort_calls(conn);
+
+ switch (skb->mark) {
+ case RXRPC_SKB_MARK_SERVICE_CONN_SECURED:
+ if (conn->state != RXRPC_CONN_SERVICE)
+ break;
+
+ for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
+ rxrpc_call_is_secure(conn->channels[loop].call);
+ break;
+ }
+
+ /* Process delayed ACKs whose time has come. */
+ if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
+ rxrpc_process_delayed_final_acks(conn, false);
+}
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 3c8f83dacb2b..ac85d4644a3c 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -23,12 +23,30 @@ static void rxrpc_clean_up_connection(struct work_struct *work);
static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet,
unsigned long reap_at);
+void rxrpc_poke_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why)
+{
+ struct rxrpc_local *local = conn->local;
+ bool busy;
+
+ if (WARN_ON_ONCE(!local))
+ return;
+
+ spin_lock_bh(&local->lock);
+ busy = !list_empty(&conn->attend_link);
+ if (!busy) {
+ rxrpc_get_connection(conn, why);
+ list_add_tail(&conn->attend_link, &local->conn_attend_q);
+ }
+ spin_unlock_bh(&local->lock);
+ rxrpc_wake_up_io_thread(local);
+}
+
static void rxrpc_connection_timer(struct timer_list *timer)
{
struct rxrpc_connection *conn =
container_of(timer, struct rxrpc_connection, timer);
- rxrpc_queue_conn(conn, rxrpc_conn_queue_timer);
+ rxrpc_poke_conn(conn, rxrpc_conn_get_poke_timer);
}
/*
@@ -49,6 +67,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet,
INIT_WORK(&conn->destructor, rxrpc_clean_up_connection);
INIT_LIST_HEAD(&conn->proc_link);
INIT_LIST_HEAD(&conn->link);
+ mutex_init(&conn->security_lock);
skb_queue_head_init(&conn->rx_queue);
conn->rxnet = rxnet;
conn->security = &rxrpc_no_security;
@@ -82,10 +101,10 @@ struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *lo
_enter(",%x", sp->hdr.cid & RXRPC_CIDMASK);
- /* Look up client connections by connection ID alone as their IDs are
- * unique for this machine.
+ /* Look up client connections by connection ID alone as their
+ * IDs are unique for this machine.
*/
- conn = idr_find(&rxrpc_client_conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT);
+ conn = idr_find(&local->conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT);
if (!conn || refcount_read(&conn->ref) == 0) {
_debug("no conn");
goto not_found;
@@ -139,7 +158,7 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
_enter("%d,%x", conn->debug_id, call->cid);
- if (rcu_access_pointer(chan->call) == call) {
+ if (chan->call == call) {
/* Save the result of the call so that we can repeat it if necessary
* through the channel, whilst disposing of the actual call record.
*/
@@ -159,12 +178,9 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
break;
}
- /* Sync with rxrpc_conn_retransmit(). */
- smp_wmb();
chan->last_call = chan->call_id;
chan->call_id = chan->call_counter;
-
- rcu_assign_pointer(chan->call, NULL);
+ chan->call = NULL;
}
_leave("");
@@ -178,6 +194,9 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
{
struct rxrpc_connection *conn = call->conn;
+ set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
+ rxrpc_see_call(call, rxrpc_call_see_disconnected);
+
call->peer->cong_ssthresh = call->cong_ssthresh;
if (!hlist_unhashed(&call->error_link)) {
@@ -186,18 +205,17 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
spin_unlock(&call->peer->lock);
}
- if (rxrpc_is_client_call(call))
- return rxrpc_disconnect_client_call(conn->bundle, call);
-
- spin_lock(&conn->bundle->channel_lock);
- __rxrpc_disconnect_call(conn, call);
- spin_unlock(&conn->bundle->channel_lock);
+ if (rxrpc_is_client_call(call)) {
+ rxrpc_disconnect_client_call(call->bundle, call);
+ } else {
+ __rxrpc_disconnect_call(conn, call);
+ conn->idle_timestamp = jiffies;
+ if (atomic_dec_and_test(&conn->active))
+ rxrpc_set_service_reap_timer(conn->rxnet,
+ jiffies + rxrpc_connection_expiry);
+ }
- set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
- conn->idle_timestamp = jiffies;
- if (atomic_dec_and_test(&conn->active))
- rxrpc_set_service_reap_timer(conn->rxnet,
- jiffies + rxrpc_connection_expiry);
+ rxrpc_put_call(call, rxrpc_call_put_io_thread);
}
/*
@@ -293,10 +311,10 @@ static void rxrpc_clean_up_connection(struct work_struct *work)
container_of(work, struct rxrpc_connection, destructor);
struct rxrpc_net *rxnet = conn->rxnet;
- ASSERT(!rcu_access_pointer(conn->channels[0].call) &&
- !rcu_access_pointer(conn->channels[1].call) &&
- !rcu_access_pointer(conn->channels[2].call) &&
- !rcu_access_pointer(conn->channels[3].call));
+ ASSERT(!conn->channels[0].call &&
+ !conn->channels[1].call &&
+ !conn->channels[2].call &&
+ !conn->channels[3].call);
ASSERT(list_empty(&conn->cache_link));
del_timer_sync(&conn->timer);
@@ -447,7 +465,6 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet)
_enter("");
atomic_dec(&rxnet->nr_conns);
- rxrpc_destroy_all_client_connections(rxnet);
del_timer_sync(&rxnet->service_conn_reap_timer);
rxrpc_queue_work(&rxnet->service_conn_reaper);
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index 2a55a88b2a5b..f30323de82bd 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -11,7 +11,6 @@
static struct rxrpc_bundle rxrpc_service_dummy_bundle = {
.ref = REFCOUNT_INIT(1),
.debug_id = UINT_MAX,
- .channel_lock = __SPIN_LOCK_UNLOCKED(&rxrpc_service_dummy_bundle.channel_lock),
};
/*
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index d0e20e946e48..367927a99881 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -9,11 +9,10 @@
#include "ar-internal.h"
-static void rxrpc_proto_abort(const char *why,
- struct rxrpc_call *call, rxrpc_seq_t seq)
+static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq,
+ enum rxrpc_abort_reason why)
{
- if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG))
- rxrpc_send_abort_packet(call);
+ rxrpc_abort_call(call, seq, RX_PROTOCOL_ERROR, -EBADMSG, why);
}
/*
@@ -185,7 +184,7 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call)
if (call->cong_mode != RXRPC_CALL_SLOW_START &&
call->cong_mode != RXRPC_CALL_CONGEST_AVOIDANCE)
return;
- if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY)
+ if (__rxrpc_call_state(call) == RXRPC_CALL_CLIENT_AWAIT_REPLY)
return;
rtt = ns_to_ktime(call->peer->srtt_us * (1000 / 8));
@@ -250,47 +249,34 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
* This occurs when we get an ACKALL packet, the first DATA packet of a reply,
* or a final ACK packet.
*/
-static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
- const char *abort_why)
+static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
+ enum rxrpc_abort_reason abort_why)
{
- unsigned int state;
-
ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags));
- write_lock(&call->state_lock);
-
- state = call->state;
- switch (state) {
+ switch (__rxrpc_call_state(call)) {
case RXRPC_CALL_CLIENT_SEND_REQUEST:
case RXRPC_CALL_CLIENT_AWAIT_REPLY:
- if (reply_begun)
- call->state = state = RXRPC_CALL_CLIENT_RECV_REPLY;
- else
- call->state = state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
+ if (reply_begun) {
+ rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_RECV_REPLY);
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_end);
+ break;
+ }
+
+ rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_REPLY);
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_await_reply);
break;
case RXRPC_CALL_SERVER_AWAIT_ACK:
- __rxrpc_call_completed(call);
- state = call->state;
+ rxrpc_call_completed(call);
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_end);
break;
default:
- goto bad_state;
+ kdebug("end_tx %s", rxrpc_call_states[__rxrpc_call_state(call)]);
+ rxrpc_proto_abort(call, call->tx_top, abort_why);
+ break;
}
-
- write_unlock(&call->state_lock);
- if (state == RXRPC_CALL_CLIENT_AWAIT_REPLY)
- trace_rxrpc_txqueue(call, rxrpc_txqueue_await_reply);
- else
- trace_rxrpc_txqueue(call, rxrpc_txqueue_end);
- _leave(" = ok");
- return true;
-
-bad_state:
- write_unlock(&call->state_lock);
- kdebug("end_tx %s", rxrpc_call_states[call->state]);
- rxrpc_proto_abort(abort_why, call, call->tx_top);
- return false;
}
/*
@@ -305,18 +291,48 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call)
if (call->ackr_reason) {
now = jiffies;
timo = now + MAX_JIFFY_OFFSET;
- WRITE_ONCE(call->resend_at, timo);
+
WRITE_ONCE(call->delay_ack_at, timo);
trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now);
}
if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
if (!rxrpc_rotate_tx_window(call, top, &summary)) {
- rxrpc_proto_abort("TXL", call, top);
+ rxrpc_proto_abort(call, top, rxrpc_eproto_early_reply);
return false;
}
}
- return rxrpc_end_tx_phase(call, true, "ETD");
+
+ rxrpc_end_tx_phase(call, true, rxrpc_eproto_unexpected_reply);
+ return true;
+}
+
+/*
+ * End the packet reception phase.
+ */
+static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
+{
+ rxrpc_seq_t whigh = READ_ONCE(call->rx_highest_seq);
+
+ _enter("%d,%s", call->debug_id, rxrpc_call_states[__rxrpc_call_state(call)]);
+
+ trace_rxrpc_receive(call, rxrpc_receive_end, 0, whigh);
+
+ switch (__rxrpc_call_state(call)) {
+ case RXRPC_CALL_CLIENT_RECV_REPLY:
+ rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_terminal_ack);
+ rxrpc_call_completed(call);
+ break;
+
+ case RXRPC_CALL_SERVER_RECV_REQUEST:
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_ACK_REQUEST);
+ call->expect_req_by = jiffies + MAX_JIFFY_OFFSET;
+ rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_processing_op);
+ break;
+
+ default:
+ break;
+ }
}
static void rxrpc_input_update_ack_window(struct rxrpc_call *call,
@@ -337,8 +353,9 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb,
__skb_queue_tail(&call->recvmsg_queue, skb);
rxrpc_input_update_ack_window(call, window, wtop);
-
trace_rxrpc_receive(call, last ? why + 1 : why, sp->hdr.serial, sp->hdr.seq);
+ if (last)
+ rxrpc_end_rx_phase(call, sp->hdr.serial);
}
/*
@@ -366,17 +383,14 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
if (last) {
if (test_and_set_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
- seq + 1 != wtop) {
- rxrpc_proto_abort("LSN", call, seq);
- return;
- }
+ seq + 1 != wtop)
+ return rxrpc_proto_abort(call, seq, rxrpc_eproto_different_last);
} else {
if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
after_eq(seq, wtop)) {
pr_warn("Packet beyond last: c=%x q=%x window=%x-%x wlimit=%x\n",
call->debug_id, seq, window, wtop, wlimit);
- rxrpc_proto_abort("LSA", call, seq);
- return;
+ return rxrpc_proto_abort(call, seq, rxrpc_eproto_data_after_last);
}
}
@@ -550,7 +564,6 @@ protocol_error:
static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- enum rxrpc_call_state state;
rxrpc_serial_t serial = sp->hdr.serial;
rxrpc_seq_t seq0 = sp->hdr.seq;
@@ -558,11 +571,20 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
atomic64_read(&call->ackr_window), call->rx_highest_seq,
skb->len, seq0);
- state = READ_ONCE(call->state);
- if (state >= RXRPC_CALL_COMPLETE)
+ if (__rxrpc_call_is_complete(call))
return;
- if (state == RXRPC_CALL_SERVER_RECV_REQUEST) {
+ switch (__rxrpc_call_state(call)) {
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+ /* Received data implicitly ACKs all of the request
+ * packets we sent when we're acting as a client.
+ */
+ if (!rxrpc_receiving_reply(call))
+ goto out_notify;
+ break;
+
+ case RXRPC_CALL_SERVER_RECV_REQUEST: {
unsigned long timo = READ_ONCE(call->next_req_timo);
unsigned long now, expect_req_by;
@@ -573,18 +595,15 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_reduce_call_timer(call, expect_req_by, now,
rxrpc_timer_set_for_idle);
}
+ break;
}
- /* Received data implicitly ACKs all of the request packets we sent
- * when we're acting as a client.
- */
- if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
- state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
- !rxrpc_receiving_reply(call))
- goto out_notify;
+ default:
+ break;
+ }
if (!rxrpc_input_split_jumbo(call, skb)) {
- rxrpc_proto_abort("VLD", call, sp->hdr.seq);
+ rxrpc_proto_abort(call, sp->hdr.seq, rxrpc_badmsg_bad_jumbo);
goto out_notify;
}
skb = NULL;
@@ -765,7 +784,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
offset = sizeof(struct rxrpc_wire_header);
if (skb_copy_bits(skb, offset, &ack, sizeof(ack)) < 0)
- return rxrpc_proto_abort("XAK", call, 0);
+ return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack);
offset += sizeof(ack);
ack_serial = sp->hdr.serial;
@@ -845,7 +864,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
ioffset = offset + nr_acks + 3;
if (skb->len >= ioffset + sizeof(info) &&
skb_copy_bits(skb, ioffset, &info, sizeof(info)) < 0)
- return rxrpc_proto_abort("XAI", call, 0);
+ return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack_info);
if (nr_acks > 0)
skb_condense(skb);
@@ -868,10 +887,10 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_input_ackinfo(call, skb, &info);
if (first_soft_ack == 0)
- return rxrpc_proto_abort("AK0", call, 0);
+ return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero);
/* Ignore ACKs unless we are or have just been transmitting. */
- switch (READ_ONCE(call->state)) {
+ switch (__rxrpc_call_state(call)) {
case RXRPC_CALL_CLIENT_SEND_REQUEST:
case RXRPC_CALL_CLIENT_AWAIT_REPLY:
case RXRPC_CALL_SERVER_SEND_REPLY:
@@ -883,20 +902,20 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
if (before(hard_ack, call->acks_hard_ack) ||
after(hard_ack, call->tx_top))
- return rxrpc_proto_abort("AKW", call, 0);
+ return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_outside_window);
if (nr_acks > call->tx_top - hard_ack)
- return rxrpc_proto_abort("AKN", call, 0);
+ return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_sack_overflow);
if (after(hard_ack, call->acks_hard_ack)) {
if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) {
- rxrpc_end_tx_phase(call, false, "ETA");
+ rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack);
return;
}
}
if (nr_acks > 0) {
if (offset > (int)skb->len - nr_acks)
- return rxrpc_proto_abort("XSA", call, 0);
+ return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack);
rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack,
nr_acks, &summary);
}
@@ -918,7 +937,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb)
struct rxrpc_ack_summary summary = { 0 };
if (rxrpc_rotate_tx_window(call, call->tx_top, &summary))
- rxrpc_end_tx_phase(call, false, "ETL");
+ rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ackall);
}
/*
@@ -963,27 +982,23 @@ void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb)
switch (sp->hdr.type) {
case RXRPC_PACKET_TYPE_DATA:
- rxrpc_input_data(call, skb);
- break;
+ return rxrpc_input_data(call, skb);
case RXRPC_PACKET_TYPE_ACK:
- rxrpc_input_ack(call, skb);
- break;
+ return rxrpc_input_ack(call, skb);
case RXRPC_PACKET_TYPE_BUSY:
/* Just ignore BUSY packets from the server; the retry and
* lifespan timers will take care of business. BUSY packets
* from the client don't make sense.
*/
- break;
+ return;
case RXRPC_PACKET_TYPE_ABORT:
- rxrpc_input_abort(call, skb);
- break;
+ return rxrpc_input_abort(call, skb);
case RXRPC_PACKET_TYPE_ACKALL:
- rxrpc_input_ackall(call, skb);
- break;
+ return rxrpc_input_ackall(call, skb);
default:
break;
@@ -998,24 +1013,18 @@ void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb)
*/
void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb)
{
- struct rxrpc_connection *conn = call->conn;
-
- switch (READ_ONCE(call->state)) {
+ switch (__rxrpc_call_state(call)) {
case RXRPC_CALL_SERVER_AWAIT_ACK:
rxrpc_call_completed(call);
fallthrough;
case RXRPC_CALL_COMPLETE:
break;
default:
- if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN))
- rxrpc_send_abort_packet(call);
+ rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ESHUTDOWN,
+ rxrpc_eproto_improper_term);
trace_rxrpc_improper_term(call);
break;
}
rxrpc_input_call_event(call, skb);
-
- spin_lock(&conn->bundle->channel_lock);
- __rxrpc_disconnect_call(conn, call);
- spin_unlock(&conn->bundle->channel_lock);
}
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index 0eb8471bfc53..34353b6e584b 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -43,25 +43,17 @@ static void none_free_call_crypto(struct rxrpc_call *call)
}
static int none_respond_to_challenge(struct rxrpc_connection *conn,
- struct sk_buff *skb,
- u32 *_abort_code)
+ struct sk_buff *skb)
{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
- tracepoint_string("chall_none"));
- return -EPROTO;
+ return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
+ rxrpc_eproto_rxnull_challenge);
}
static int none_verify_response(struct rxrpc_connection *conn,
- struct sk_buff *skb,
- u32 *_abort_code)
+ struct sk_buff *skb)
{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
- tracepoint_string("resp_none"));
- return -EPROTO;
+ return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
+ rxrpc_eproto_rxnull_response);
}
static void none_clear(struct rxrpc_connection *conn)
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
index 1ad067d66fb6..9e9dfb2fc559 100644
--- a/net/rxrpc/io_thread.c
+++ b/net/rxrpc/io_thread.c
@@ -67,9 +67,31 @@ void rxrpc_error_report(struct sock *sk)
}
/*
+ * Directly produce an abort from a packet.
+ */
+bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why,
+ s32 abort_code, int err)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ trace_rxrpc_abort(0, why, sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ abort_code, err);
+ skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
+ skb->priority = abort_code;
+ return false;
+}
+
+static bool rxrpc_bad_message(struct sk_buff *skb, enum rxrpc_abort_reason why)
+{
+ return rxrpc_direct_abort(skb, why, RX_PROTOCOL_ERROR, -EBADMSG);
+}
+
+#define just_discard true
+
+/*
* Process event packets targeted at a local endpoint.
*/
-static void rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb)
+static bool rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
char v;
@@ -81,22 +103,21 @@ static void rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb)
if (v == 0)
rxrpc_send_version_request(local, &sp->hdr, skb);
}
+
+ return true;
}
/*
* Extract the wire header from a packet and translate the byte order.
*/
-static noinline
-int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
+static bool rxrpc_extract_header(struct rxrpc_skb_priv *sp,
+ struct sk_buff *skb)
{
struct rxrpc_wire_header whdr;
/* dig out the RxRPC connection details */
- if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) {
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
- tracepoint_string("bad_hdr"));
- return -EBADMSG;
- }
+ if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0)
+ return rxrpc_bad_message(skb, rxrpc_badmsg_short_hdr);
memset(sp, 0, sizeof(*sp));
sp->hdr.epoch = ntohl(whdr.epoch);
@@ -110,7 +131,7 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
sp->hdr.securityIndex = whdr.securityIndex;
sp->hdr._rsvd = ntohs(whdr._rsvd);
sp->hdr.serviceId = ntohs(whdr.serviceId);
- return 0;
+ return true;
}
/*
@@ -130,28 +151,28 @@ static bool rxrpc_extract_abort(struct sk_buff *skb)
/*
* Process packets received on the local endpoint
*/
-static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
+static bool rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
{
struct rxrpc_connection *conn;
struct sockaddr_rxrpc peer_srx;
struct rxrpc_skb_priv *sp;
struct rxrpc_peer *peer = NULL;
struct sk_buff *skb = *_skb;
- int ret = 0;
+ bool ret = false;
skb_pull(skb, sizeof(struct udphdr));
sp = rxrpc_skb(skb);
/* dig out the RxRPC connection details */
- if (rxrpc_extract_header(sp, skb) < 0)
- goto bad_message;
+ if (!rxrpc_extract_header(sp, skb))
+ return just_discard;
if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
static int lose;
if ((lose++ & 7) == 7) {
trace_rxrpc_rx_lose(sp);
- return 0;
+ return just_discard;
}
}
@@ -160,28 +181,28 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
switch (sp->hdr.type) {
case RXRPC_PACKET_TYPE_VERSION:
if (rxrpc_to_client(sp))
- return 0;
- rxrpc_input_version(local, skb);
- return 0;
+ return just_discard;
+ return rxrpc_input_version(local, skb);
case RXRPC_PACKET_TYPE_BUSY:
if (rxrpc_to_server(sp))
- return 0;
+ return just_discard;
fallthrough;
case RXRPC_PACKET_TYPE_ACK:
case RXRPC_PACKET_TYPE_ACKALL:
if (sp->hdr.callNumber == 0)
- goto bad_message;
+ return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call);
break;
case RXRPC_PACKET_TYPE_ABORT:
if (!rxrpc_extract_abort(skb))
- return 0; /* Just discard if malformed */
+ return just_discard; /* Just discard if malformed */
break;
case RXRPC_PACKET_TYPE_DATA:
- if (sp->hdr.callNumber == 0 ||
- sp->hdr.seq == 0)
- goto bad_message;
+ if (sp->hdr.callNumber == 0)
+ return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call);
+ if (sp->hdr.seq == 0)
+ return rxrpc_bad_message(skb, rxrpc_badmsg_zero_seq);
/* Unshare the packet so that it can be modified for in-place
* decryption.
@@ -191,7 +212,7 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
if (!skb) {
rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare_nomem);
*_skb = NULL;
- return 0;
+ return just_discard;
}
if (skb != *_skb) {
@@ -205,28 +226,28 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
case RXRPC_PACKET_TYPE_CHALLENGE:
if (rxrpc_to_server(sp))
- return 0;
+ return just_discard;
break;
case RXRPC_PACKET_TYPE_RESPONSE:
if (rxrpc_to_client(sp))
- return 0;
+ return just_discard;
break;
/* Packet types 9-11 should just be ignored. */
case RXRPC_PACKET_TYPE_PARAMS:
case RXRPC_PACKET_TYPE_10:
case RXRPC_PACKET_TYPE_11:
- return 0;
+ return just_discard;
default:
- goto bad_message;
+ return rxrpc_bad_message(skb, rxrpc_badmsg_unsupported_packet);
}
if (sp->hdr.serviceId == 0)
- goto bad_message;
+ return rxrpc_bad_message(skb, rxrpc_badmsg_zero_service);
if (WARN_ON_ONCE(rxrpc_extract_addr_from_skb(&peer_srx, skb) < 0))
- return true; /* Unsupported address type - discard. */
+ return just_discard; /* Unsupported address type. */
if (peer_srx.transport.family != local->srx.transport.family &&
(peer_srx.transport.family == AF_INET &&
@@ -234,7 +255,7 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n",
peer_srx.transport.family,
local->srx.transport.family);
- return true; /* Wrong address type - discard. */
+ return just_discard; /* Wrong address type. */
}
if (rxrpc_to_client(sp)) {
@@ -242,12 +263,8 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
conn = rxrpc_find_client_connection_rcu(local, &peer_srx, skb);
conn = rxrpc_get_connection_maybe(conn, rxrpc_conn_get_call_input);
rcu_read_unlock();
- if (!conn) {
- trace_rxrpc_abort(0, "NCC", sp->hdr.cid,
- sp->hdr.callNumber, sp->hdr.seq,
- RXKADINCONSISTENCY, EBADMSG);
- goto protocol_error;
- }
+ if (!conn)
+ return rxrpc_protocol_error(skb, rxrpc_eproto_no_client_conn);
ret = rxrpc_input_packet_on_conn(conn, &peer_srx, skb);
rxrpc_put_connection(conn, rxrpc_conn_put_call_input);
@@ -280,19 +297,7 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
ret = rxrpc_new_incoming_call(local, peer, NULL, &peer_srx, skb);
rxrpc_put_peer(peer, rxrpc_peer_put_input);
- if (ret < 0)
- goto reject_packet;
- return 0;
-
-bad_message:
- trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_PROTOCOL_ERROR, EBADMSG);
-protocol_error:
- skb->priority = RX_PROTOCOL_ERROR;
- skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
-reject_packet:
- rxrpc_reject_packet(local, skb);
- return 0;
+ return ret;
}
/*
@@ -306,21 +311,23 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
struct rxrpc_channel *chan;
struct rxrpc_call *call = NULL;
unsigned int channel;
+ bool ret;
if (sp->hdr.securityIndex != conn->security_ix)
- goto wrong_security;
+ return rxrpc_direct_abort(skb, rxrpc_eproto_wrong_security,
+ RXKADINCONSISTENCY, -EBADMSG);
if (sp->hdr.serviceId != conn->service_id) {
int old_id;
if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags))
- goto reupgrade;
+ return rxrpc_protocol_error(skb, rxrpc_eproto_reupgrade);
+
old_id = cmpxchg(&conn->service_id, conn->orig_service_id,
sp->hdr.serviceId);
-
if (old_id != conn->orig_service_id &&
old_id != sp->hdr.serviceId)
- goto reupgrade;
+ return rxrpc_protocol_error(skb, rxrpc_eproto_bad_upgrade);
}
if (after(sp->hdr.serial, conn->hi_serial))
@@ -336,19 +343,19 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
/* Ignore really old calls */
if (sp->hdr.callNumber < chan->last_call)
- return 0;
+ return just_discard;
if (sp->hdr.callNumber == chan->last_call) {
if (chan->call ||
sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)
- return 0;
+ return just_discard;
/* For the previous service call, if completed successfully, we
* discard all further packets.
*/
if (rxrpc_conn_is_service(conn) &&
chan->last_type == RXRPC_PACKET_TYPE_ACK)
- return 0;
+ return just_discard;
/* But otherwise we need to retransmit the final packet from
* data cached in the connection record.
@@ -358,19 +365,17 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
sp->hdr.seq,
sp->hdr.serial,
sp->hdr.flags);
- rxrpc_input_conn_packet(conn, skb);
- return 0;
+ rxrpc_conn_retransmit_call(conn, skb, channel);
+ return just_discard;
}
- rcu_read_lock();
- call = rxrpc_try_get_call(rcu_dereference(chan->call),
- rxrpc_call_get_input);
- rcu_read_unlock();
+ call = rxrpc_try_get_call(chan->call, rxrpc_call_get_input);
if (sp->hdr.callNumber > chan->call_id) {
if (rxrpc_to_client(sp)) {
rxrpc_put_call(call, rxrpc_call_put_input);
- goto reject_packet;
+ return rxrpc_protocol_error(skb,
+ rxrpc_eproto_unexpected_implicit_end);
}
if (call) {
@@ -382,38 +387,14 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
if (!call) {
if (rxrpc_to_client(sp))
- goto bad_message;
- if (rxrpc_new_incoming_call(conn->local, conn->peer, conn,
- peer_srx, skb) == 0)
- return 0;
- goto reject_packet;
+ return rxrpc_protocol_error(skb, rxrpc_eproto_no_client_call);
+ return rxrpc_new_incoming_call(conn->local, conn->peer, conn,
+ peer_srx, skb);
}
- rxrpc_input_call_event(call, skb);
+ ret = rxrpc_input_call_event(call, skb);
rxrpc_put_call(call, rxrpc_call_put_input);
- return 0;
-
-wrong_security:
- trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RXKADINCONSISTENCY, EBADMSG);
- skb->priority = RXKADINCONSISTENCY;
- goto post_abort;
-
-reupgrade:
- trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_PROTOCOL_ERROR, EBADMSG);
- goto protocol_error;
-
-bad_message:
- trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_PROTOCOL_ERROR, EBADMSG);
-protocol_error:
- skb->priority = RX_PROTOCOL_ERROR;
-post_abort:
- skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
-reject_packet:
- rxrpc_reject_packet(conn->local, skb);
- return 0;
+ return ret;
}
/*
@@ -421,6 +402,7 @@ reject_packet:
*/
int rxrpc_io_thread(void *data)
{
+ struct rxrpc_connection *conn;
struct sk_buff_head rx_queue;
struct rxrpc_local *local = data;
struct rxrpc_call *call;
@@ -436,6 +418,24 @@ int rxrpc_io_thread(void *data)
for (;;) {
rxrpc_inc_stat(local->rxnet, stat_io_loop);
+ /* Deal with connections that want immediate attention. */
+ conn = list_first_entry_or_null(&local->conn_attend_q,
+ struct rxrpc_connection,
+ attend_link);
+ if (conn) {
+ spin_lock_bh(&local->lock);
+ list_del_init(&conn->attend_link);
+ spin_unlock_bh(&local->lock);
+
+ rxrpc_input_conn_event(conn, NULL);
+ rxrpc_put_connection(conn, rxrpc_conn_put_poke);
+ continue;
+ }
+
+ if (test_and_clear_bit(RXRPC_CLIENT_CONN_REAP_TIMER,
+ &local->client_conn_flags))
+ rxrpc_discard_expired_client_conns(local);
+
/* Deal with calls that want immediate attention. */
if ((call = list_first_entry_or_null(&local->call_attend_q,
struct rxrpc_call,
@@ -450,12 +450,17 @@ int rxrpc_io_thread(void *data)
continue;
}
+ if (!list_empty(&local->new_client_calls))
+ rxrpc_connect_client_calls(local);
+
/* Process received packets and errors. */
if ((skb = __skb_dequeue(&rx_queue))) {
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
switch (skb->mark) {
case RXRPC_SKB_MARK_PACKET:
skb->priority = 0;
- rxrpc_input_packet(local, &skb);
+ if (!rxrpc_input_packet(local, &skb))
+ rxrpc_reject_packet(local, skb);
trace_rxrpc_rx_done(skb->mark, skb->priority);
rxrpc_free_skb(skb, rxrpc_skb_put_input);
break;
@@ -463,6 +468,11 @@ int rxrpc_io_thread(void *data)
rxrpc_input_error(local, skb);
rxrpc_free_skb(skb, rxrpc_skb_put_error_report);
break;
+ case RXRPC_SKB_MARK_SERVICE_CONN_SECURED:
+ rxrpc_input_conn_event(sp->conn, skb);
+ rxrpc_put_connection(sp->conn, rxrpc_conn_put_poke);
+ rxrpc_free_skb(skb, rxrpc_skb_put_conn_secured);
+ break;
default:
WARN_ON_ONCE(1);
rxrpc_free_skb(skb, rxrpc_skb_put_unknown);
@@ -481,7 +491,11 @@ int rxrpc_io_thread(void *data)
set_current_state(TASK_INTERRUPTIBLE);
should_stop = kthread_should_stop();
if (!skb_queue_empty(&local->rx_queue) ||
- !list_empty(&local->call_attend_q)) {
+ !list_empty(&local->call_attend_q) ||
+ !list_empty(&local->conn_attend_q) ||
+ !list_empty(&local->new_client_calls) ||
+ test_bit(RXRPC_CLIENT_CONN_REAP_TIMER,
+ &local->client_conn_flags)) {
__set_current_state(TASK_RUNNING);
continue;
}
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 270b63d8f37a..b8eaca5d9f22 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -82,31 +82,59 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local,
}
}
+static void rxrpc_client_conn_reap_timeout(struct timer_list *timer)
+{
+ struct rxrpc_local *local =
+ container_of(timer, struct rxrpc_local, client_conn_reap_timer);
+
+ if (local->kill_all_client_conns &&
+ test_and_set_bit(RXRPC_CLIENT_CONN_REAP_TIMER, &local->client_conn_flags))
+ rxrpc_wake_up_io_thread(local);
+}
+
/*
* Allocate a new local endpoint.
*/
-static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
+static struct rxrpc_local *rxrpc_alloc_local(struct net *net,
const struct sockaddr_rxrpc *srx)
{
struct rxrpc_local *local;
+ u32 tmp;
local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
if (local) {
refcount_set(&local->ref, 1);
atomic_set(&local->active_users, 1);
- local->rxnet = rxnet;
+ local->net = net;
+ local->rxnet = rxrpc_net(net);
INIT_HLIST_NODE(&local->link);
init_rwsem(&local->defrag_sem);
init_completion(&local->io_thread_ready);
skb_queue_head_init(&local->rx_queue);
+ INIT_LIST_HEAD(&local->conn_attend_q);
INIT_LIST_HEAD(&local->call_attend_q);
+
local->client_bundles = RB_ROOT;
spin_lock_init(&local->client_bundles_lock);
+ local->kill_all_client_conns = false;
+ INIT_LIST_HEAD(&local->idle_client_conns);
+ timer_setup(&local->client_conn_reap_timer,
+ rxrpc_client_conn_reap_timeout, 0);
+
spin_lock_init(&local->lock);
rwlock_init(&local->services_lock);
local->debug_id = atomic_inc_return(&rxrpc_debug_id);
memcpy(&local->srx, srx, sizeof(*srx));
local->srx.srx_service = 0;
+ idr_init(&local->conn_ids);
+ get_random_bytes(&tmp, sizeof(tmp));
+ tmp &= 0x3fffffff;
+ if (tmp == 0)
+ tmp = 1;
+ idr_set_cursor(&local->conn_ids, tmp);
+ INIT_LIST_HEAD(&local->new_client_calls);
+ spin_lock_init(&local->client_call_lock);
+
trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, 1);
}
@@ -248,7 +276,7 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
goto found;
}
- local = rxrpc_alloc_local(rxnet, srx);
+ local = rxrpc_alloc_local(net, srx);
if (!local)
goto nomem;
@@ -407,6 +435,7 @@ void rxrpc_destroy_local(struct rxrpc_local *local)
* local endpoint.
*/
rxrpc_purge_queue(&local->rx_queue);
+ rxrpc_purge_client_connections(local);
}
/*
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index 5905530e2f33..a0319c040c25 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -10,15 +10,6 @@
unsigned int rxrpc_net_id;
-static void rxrpc_client_conn_reap_timeout(struct timer_list *timer)
-{
- struct rxrpc_net *rxnet =
- container_of(timer, struct rxrpc_net, client_conn_reap_timer);
-
- if (rxnet->live)
- rxrpc_queue_work(&rxnet->client_conn_reaper);
-}
-
static void rxrpc_service_conn_reap_timeout(struct timer_list *timer)
{
struct rxrpc_net *rxnet =
@@ -63,14 +54,6 @@ static __net_init int rxrpc_init_net(struct net *net)
rxrpc_service_conn_reap_timeout, 0);
atomic_set(&rxnet->nr_client_conns, 0);
- rxnet->kill_all_client_conns = false;
- spin_lock_init(&rxnet->client_conn_cache_lock);
- mutex_init(&rxnet->client_conn_discard_lock);
- INIT_LIST_HEAD(&rxnet->idle_client_conns);
- INIT_WORK(&rxnet->client_conn_reaper,
- rxrpc_discard_expired_client_conns);
- timer_setup(&rxnet->client_conn_reap_timer,
- rxrpc_client_conn_reap_timeout, 0);
INIT_HLIST_HEAD(&rxnet->local_endpoints);
mutex_init(&rxnet->local_mutex);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 3d8c9f830ee0..a9746be29634 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -261,7 +261,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
rxrpc_tx_point_call_ack);
rxrpc_tx_backoff(call, ret);
- if (call->state < RXRPC_CALL_COMPLETE) {
+ if (!__rxrpc_call_is_complete(call)) {
if (ret < 0)
rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
rxrpc_set_keepalive(call);
@@ -545,6 +545,62 @@ send_fragmentable:
}
/*
+ * Transmit a connection-level abort.
+ */
+void rxrpc_send_conn_abort(struct rxrpc_connection *conn)
+{
+ struct rxrpc_wire_header whdr;
+ struct msghdr msg;
+ struct kvec iov[2];
+ __be32 word;
+ size_t len;
+ u32 serial;
+ int ret;
+
+ msg.msg_name = &conn->peer->srx.transport;
+ msg.msg_namelen = conn->peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ whdr.epoch = htonl(conn->proto.epoch);
+ whdr.cid = htonl(conn->proto.cid);
+ whdr.callNumber = 0;
+ whdr.seq = 0;
+ whdr.type = RXRPC_PACKET_TYPE_ABORT;
+ whdr.flags = conn->out_clientflag;
+ whdr.userStatus = 0;
+ whdr.securityIndex = conn->security_ix;
+ whdr._rsvd = 0;
+ whdr.serviceId = htons(conn->service_id);
+
+ word = htonl(conn->abort_code);
+
+ iov[0].iov_base = &whdr;
+ iov[0].iov_len = sizeof(whdr);
+ iov[1].iov_base = &word;
+ iov[1].iov_len = sizeof(word);
+
+ len = iov[0].iov_len + iov[1].iov_len;
+
+ serial = atomic_inc_return(&conn->serial);
+ whdr.serial = htonl(serial);
+
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
+ ret = do_udp_sendmsg(conn->local->socket, &msg, len);
+ if (ret < 0) {
+ trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
+ rxrpc_tx_point_conn_abort);
+ _debug("sendmsg failed: %d", ret);
+ return;
+ }
+
+ trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort);
+
+ conn->peer->last_tx_at = ktime_get_seconds();
+}
+
+/*
* Reject a packet through the local endpoint.
*/
void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
@@ -667,7 +723,7 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer)
static inline void rxrpc_instant_resend(struct rxrpc_call *call,
struct rxrpc_txbuf *txb)
{
- if (call->state < RXRPC_CALL_COMPLETE)
+ if (!__rxrpc_call_is_complete(call))
kdebug("resend");
}
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 4eecea2be307..8d7a715a0bb1 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -147,10 +147,10 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local,
* assess the MTU size for the network interface through which this peer is
* reached
*/
-static void rxrpc_assess_MTU_size(struct rxrpc_sock *rx,
+static void rxrpc_assess_MTU_size(struct rxrpc_local *local,
struct rxrpc_peer *peer)
{
- struct net *net = sock_net(&rx->sk);
+ struct net *net = local->net;
struct dst_entry *dst;
struct rtable *rt;
struct flowi fl;
@@ -236,11 +236,11 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp,
/*
* Initialise peer record.
*/
-static void rxrpc_init_peer(struct rxrpc_sock *rx, struct rxrpc_peer *peer,
+static void rxrpc_init_peer(struct rxrpc_local *local, struct rxrpc_peer *peer,
unsigned long hash_key)
{
peer->hash_key = hash_key;
- rxrpc_assess_MTU_size(rx, peer);
+ rxrpc_assess_MTU_size(local, peer);
peer->mtu = peer->if_mtu;
peer->rtt_last_req = ktime_get_real();
@@ -272,8 +272,7 @@ static void rxrpc_init_peer(struct rxrpc_sock *rx, struct rxrpc_peer *peer,
/*
* Set up a new peer.
*/
-static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx,
- struct rxrpc_local *local,
+static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local,
struct sockaddr_rxrpc *srx,
unsigned long hash_key,
gfp_t gfp)
@@ -285,7 +284,7 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx,
peer = rxrpc_alloc_peer(local, gfp, rxrpc_peer_new_client);
if (peer) {
memcpy(&peer->srx, srx, sizeof(*srx));
- rxrpc_init_peer(rx, peer, hash_key);
+ rxrpc_init_peer(local, peer, hash_key);
}
_leave(" = %p", peer);
@@ -304,14 +303,13 @@ static void rxrpc_free_peer(struct rxrpc_peer *peer)
* since we've already done a search in the list from the non-reentrant context
* (the data_ready handler) that is the only place we can add new peers.
*/
-void rxrpc_new_incoming_peer(struct rxrpc_sock *rx, struct rxrpc_local *local,
- struct rxrpc_peer *peer)
+void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer)
{
struct rxrpc_net *rxnet = local->rxnet;
unsigned long hash_key;
hash_key = rxrpc_peer_hash_key(local, &peer->srx);
- rxrpc_init_peer(rx, peer, hash_key);
+ rxrpc_init_peer(local, peer, hash_key);
spin_lock(&rxnet->peer_hash_lock);
hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key);
@@ -322,8 +320,7 @@ void rxrpc_new_incoming_peer(struct rxrpc_sock *rx, struct rxrpc_local *local,
/*
* obtain a remote transport endpoint for the specified address
*/
-struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
- struct rxrpc_local *local,
+struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
struct sockaddr_rxrpc *srx, gfp_t gfp)
{
struct rxrpc_peer *peer, *candidate;
@@ -343,7 +340,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
/* The peer is not yet present in hash - create a candidate
* for a new record and then redo the search.
*/
- candidate = rxrpc_create_peer(rx, local, srx, hash_key, gfp);
+ candidate = rxrpc_create_peer(local, srx, hash_key, gfp);
if (!candidate) {
_leave(" = NULL [nomem]");
return NULL;
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 3a59591ec061..750158a085cd 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -12,13 +12,13 @@
static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
[RXRPC_CONN_UNUSED] = "Unused ",
+ [RXRPC_CONN_CLIENT_UNSECURED] = "ClUnsec ",
[RXRPC_CONN_CLIENT] = "Client ",
[RXRPC_CONN_SERVICE_PREALLOC] = "SvPrealc",
[RXRPC_CONN_SERVICE_UNSECURED] = "SvUnsec ",
[RXRPC_CONN_SERVICE_CHALLENGING] = "SvChall ",
[RXRPC_CONN_SERVICE] = "SvSecure",
- [RXRPC_CONN_REMOTELY_ABORTED] = "RmtAbort",
- [RXRPC_CONN_LOCALLY_ABORTED] = "LocAbort",
+ [RXRPC_CONN_ABORTED] = "Aborted ",
};
/*
@@ -51,6 +51,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
struct rxrpc_local *local;
struct rxrpc_call *call;
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+ enum rxrpc_call_state state;
unsigned long timeout = 0;
rxrpc_seq_t acks_hard_ack;
char lbuff[50], rbuff[50];
@@ -75,7 +76,8 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
sprintf(rbuff, "%pISpc", &call->dest_srx.transport);
- if (call->state != RXRPC_CALL_SERVER_PREALLOC) {
+ state = rxrpc_call_state(call);
+ if (state != RXRPC_CALL_SERVER_PREALLOC) {
timeout = READ_ONCE(call->expect_rx_by);
timeout -= jiffies;
}
@@ -92,7 +94,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
call->call_id,
rxrpc_is_service_call(call) ? "Svc" : "Clt",
refcount_read(&call->ref),
- rxrpc_call_states[call->state],
+ rxrpc_call_states[state],
call->abort_code,
call->debug_id,
acks_hard_ack, READ_ONCE(call->tx_top) - acks_hard_ack,
@@ -143,6 +145,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
{
struct rxrpc_connection *conn;
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+ const char *state;
char lbuff[50], rbuff[50];
if (v == &rxnet->conn_proc_list) {
@@ -163,9 +166,11 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
}
sprintf(lbuff, "%pISpc", &conn->local->srx.transport);
-
sprintf(rbuff, "%pISpc", &conn->peer->srx.transport);
print:
+ state = rxrpc_is_conn_aborted(conn) ?
+ rxrpc_call_completions[conn->completion] :
+ rxrpc_conn_states[conn->state];
seq_printf(seq,
"UDP %-47.47s %-47.47s %4x %08x %s %3u %3d"
" %s %08x %08x %08x %08x %08x %08x %08x\n",
@@ -176,7 +181,7 @@ print:
rxrpc_conn_is_service(conn) ? "Svc" : "Clt",
refcount_read(&conn->ref),
atomic_read(&conn->active),
- rxrpc_conn_states[conn->state],
+ state,
key_serial(conn->key),
atomic_read(&conn->serial),
conn->hi_serial,
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 36b25d003cf0..dd54ceee7bcc 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -59,85 +59,6 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
}
/*
- * Transition a call to the complete state.
- */
-bool __rxrpc_set_call_completion(struct rxrpc_call *call,
- enum rxrpc_call_completion compl,
- u32 abort_code,
- int error)
-{
- if (call->state < RXRPC_CALL_COMPLETE) {
- call->abort_code = abort_code;
- call->error = error;
- call->completion = compl;
- call->state = RXRPC_CALL_COMPLETE;
- trace_rxrpc_call_complete(call);
- wake_up(&call->waitq);
- rxrpc_notify_socket(call);
- return true;
- }
- return false;
-}
-
-bool rxrpc_set_call_completion(struct rxrpc_call *call,
- enum rxrpc_call_completion compl,
- u32 abort_code,
- int error)
-{
- bool ret = false;
-
- if (call->state < RXRPC_CALL_COMPLETE) {
- write_lock(&call->state_lock);
- ret = __rxrpc_set_call_completion(call, compl, abort_code, error);
- write_unlock(&call->state_lock);
- }
- return ret;
-}
-
-/*
- * Record that a call successfully completed.
- */
-bool __rxrpc_call_completed(struct rxrpc_call *call)
-{
- return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0);
-}
-
-bool rxrpc_call_completed(struct rxrpc_call *call)
-{
- bool ret = false;
-
- if (call->state < RXRPC_CALL_COMPLETE) {
- write_lock(&call->state_lock);
- ret = __rxrpc_call_completed(call);
- write_unlock(&call->state_lock);
- }
- return ret;
-}
-
-/*
- * Record that a call is locally aborted.
- */
-bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call,
- rxrpc_seq_t seq, u32 abort_code, int error)
-{
- trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq,
- abort_code, error);
- return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED,
- abort_code, error);
-}
-
-bool rxrpc_abort_call(const char *why, struct rxrpc_call *call,
- rxrpc_seq_t seq, u32 abort_code, int error)
-{
- bool ret;
-
- write_lock(&call->state_lock);
- ret = __rxrpc_abort_call(why, call, seq, abort_code, error);
- write_unlock(&call->state_lock);
- return ret;
-}
-
-/*
* Pass a call terminating message to userspace.
*/
static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
@@ -168,7 +89,7 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp);
break;
default:
- pr_err("Invalid terminal call state %u\n", call->state);
+ pr_err("Invalid terminal call state %u\n", call->completion);
BUG();
break;
}
@@ -180,41 +101,6 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
}
/*
- * End the packet reception phase.
- */
-static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
-{
- rxrpc_seq_t whigh = READ_ONCE(call->rx_highest_seq);
-
- _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]);
-
- trace_rxrpc_receive(call, rxrpc_receive_end, 0, whigh);
-
- if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY)
- rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_terminal_ack);
-
- write_lock(&call->state_lock);
-
- switch (call->state) {
- case RXRPC_CALL_CLIENT_RECV_REPLY:
- __rxrpc_call_completed(call);
- write_unlock(&call->state_lock);
- break;
-
- case RXRPC_CALL_SERVER_RECV_REQUEST:
- call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
- call->expect_req_by = jiffies + MAX_JIFFY_OFFSET;
- write_unlock(&call->state_lock);
- rxrpc_propose_delay_ACK(call, serial,
- rxrpc_propose_ack_processing_op);
- break;
- default:
- write_unlock(&call->state_lock);
- break;
- }
-}
-
-/*
* Discard a packet we've used up and advance the Rx window by one.
*/
static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
@@ -244,10 +130,9 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
trace_rxrpc_receive(call, last ? rxrpc_receive_rotate_last : rxrpc_receive_rotate,
serial, call->rx_consumed);
- if (last) {
- rxrpc_end_rx_phase(call, serial);
- return;
- }
+
+ if (last)
+ set_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags);
/* Check to see if there's an ACK that needs sending. */
acked = atomic_add_return(call->rx_consumed - old_consumed,
@@ -272,7 +157,8 @@ static int rxrpc_verify_data(struct rxrpc_call *call, struct sk_buff *skb)
/*
* Deliver messages to a call. This keeps processing packets until the buffer
* is filled and we find either more DATA (returns 0) or the end of the DATA
- * (returns 1). If more packets are required, it returns -EAGAIN.
+ * (returns 1). If more packets are required, it returns -EAGAIN and if the
+ * call has failed it returns -EIO.
*/
static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
struct msghdr *msg, struct iov_iter *iter,
@@ -288,7 +174,13 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
rx_pkt_offset = call->rx_pkt_offset;
rx_pkt_len = call->rx_pkt_len;
- if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) {
+ if (rxrpc_call_has_failed(call)) {
+ seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1;
+ ret = -EIO;
+ goto done;
+ }
+
+ if (test_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags)) {
seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1;
ret = 1;
goto done;
@@ -312,14 +204,15 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
if (rx_pkt_offset == 0) {
ret2 = rxrpc_verify_data(call, skb);
- rx_pkt_offset = sp->offset;
- rx_pkt_len = sp->len;
trace_rxrpc_recvdata(call, rxrpc_recvmsg_next, seq,
- rx_pkt_offset, rx_pkt_len, ret2);
+ sp->offset, sp->len, ret2);
if (ret2 < 0) {
+ kdebug("verify = %d", ret2);
ret = ret2;
goto out;
}
+ rx_pkt_offset = sp->offset;
+ rx_pkt_len = sp->len;
} else {
trace_rxrpc_recvdata(call, rxrpc_recvmsg_cont, seq,
rx_pkt_offset, rx_pkt_len, 0);
@@ -388,13 +281,14 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
struct rxrpc_call *call;
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
struct list_head *l;
+ unsigned int call_debug_id = 0;
size_t copied = 0;
long timeo;
int ret;
DEFINE_WAIT(wait);
- trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_enter, 0);
+ trace_rxrpc_recvmsg(0, rxrpc_recvmsg_enter, 0);
if (flags & (MSG_OOB | MSG_TRUNC))
return -EOPNOTSUPP;
@@ -431,7 +325,7 @@ try_again:
if (list_empty(&rx->recvmsg_q)) {
if (signal_pending(current))
goto wait_interrupted;
- trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_wait, 0);
+ trace_rxrpc_recvmsg(0, rxrpc_recvmsg_wait, 0);
timeo = schedule_timeout(timeo);
}
finish_wait(sk_sleep(&rx->sk), &wait);
@@ -450,7 +344,8 @@ try_again:
rxrpc_get_call(call, rxrpc_call_get_recvmsg);
write_unlock(&rx->recvmsg_lock);
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0);
+ call_debug_id = call->debug_id;
+ trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_dequeue, 0);
/* We're going to drop the socket lock, so we need to lock the call
* against interference by sendmsg.
@@ -492,36 +387,36 @@ try_again:
msg->msg_namelen = len;
}
- switch (READ_ONCE(call->state)) {
- case RXRPC_CALL_CLIENT_RECV_REPLY:
- case RXRPC_CALL_SERVER_RECV_REQUEST:
- case RXRPC_CALL_SERVER_ACK_REQUEST:
- ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len,
- flags, &copied);
- if (ret == -EAGAIN)
- ret = 0;
-
- if (!skb_queue_empty(&call->recvmsg_queue))
- rxrpc_notify_socket(call);
- break;
- default:
+ ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len,
+ flags, &copied);
+ if (ret == -EAGAIN)
ret = 0;
- break;
- }
-
+ if (ret == -EIO)
+ goto call_failed;
if (ret < 0)
goto error_unlock_call;
- if (call->state == RXRPC_CALL_COMPLETE) {
- ret = rxrpc_recvmsg_term(call, msg);
- if (ret < 0)
- goto error_unlock_call;
- if (!(flags & MSG_PEEK))
- rxrpc_release_call(rx, call);
- msg->msg_flags |= MSG_EOR;
- ret = 1;
- }
+ if (rxrpc_call_is_complete(call) &&
+ skb_queue_empty(&call->recvmsg_queue))
+ goto call_complete;
+ if (rxrpc_call_has_failed(call))
+ goto call_failed;
+ rxrpc_notify_socket(call);
+ goto not_yet_complete;
+
+call_failed:
+ rxrpc_purge_queue(&call->recvmsg_queue);
+call_complete:
+ ret = rxrpc_recvmsg_term(call, msg);
+ if (ret < 0)
+ goto error_unlock_call;
+ if (!(flags & MSG_PEEK))
+ rxrpc_release_call(rx, call);
+ msg->msg_flags |= MSG_EOR;
+ ret = 1;
+
+not_yet_complete:
if (ret == 0)
msg->msg_flags |= MSG_MORE;
else
@@ -531,7 +426,7 @@ try_again:
error_unlock_call:
mutex_unlock(&call->user_mutex);
rxrpc_put_call(call, rxrpc_call_put_recvmsg);
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, ret);
+ trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_return, ret);
return ret;
error_requeue_call:
@@ -539,14 +434,14 @@ error_requeue_call:
write_lock(&rx->recvmsg_lock);
list_add(&call->recvmsg_link, &rx->recvmsg_q);
write_unlock(&rx->recvmsg_lock);
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_requeue, 0);
+ trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_requeue, 0);
} else {
rxrpc_put_call(call, rxrpc_call_put_recvmsg);
}
error_no_call:
release_sock(&rx->sk);
error_trace:
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, ret);
+ trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_return, ret);
return ret;
wait_interrupted:
@@ -584,49 +479,34 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
size_t offset = 0;
int ret;
- _enter("{%d,%s},%zu,%d",
- call->debug_id, rxrpc_call_states[call->state],
- *_len, want_more);
-
- ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_SECURING);
+ _enter("{%d},%zu,%d", call->debug_id, *_len, want_more);
mutex_lock(&call->user_mutex);
- switch (READ_ONCE(call->state)) {
- case RXRPC_CALL_CLIENT_RECV_REPLY:
- case RXRPC_CALL_SERVER_RECV_REQUEST:
- case RXRPC_CALL_SERVER_ACK_REQUEST:
- ret = rxrpc_recvmsg_data(sock, call, NULL, iter,
- *_len, 0, &offset);
- *_len -= offset;
- if (ret < 0)
- goto out;
-
- /* We can only reach here with a partially full buffer if we
- * have reached the end of the data. We must otherwise have a
- * full buffer or have been given -EAGAIN.
- */
- if (ret == 1) {
- if (iov_iter_count(iter) > 0)
- goto short_data;
- if (!want_more)
- goto read_phase_complete;
- ret = 0;
- goto out;
- }
-
- if (!want_more)
- goto excess_data;
+ ret = rxrpc_recvmsg_data(sock, call, NULL, iter, *_len, 0, &offset);
+ *_len -= offset;
+ if (ret == -EIO)
+ goto call_failed;
+ if (ret < 0)
goto out;
- case RXRPC_CALL_COMPLETE:
- goto call_complete;
-
- default:
- ret = -EINPROGRESS;
+ /* We can only reach here with a partially full buffer if we have
+ * reached the end of the data. We must otherwise have a full buffer
+ * or have been given -EAGAIN.
+ */
+ if (ret == 1) {
+ if (iov_iter_count(iter) > 0)
+ goto short_data;
+ if (!want_more)
+ goto read_phase_complete;
+ ret = 0;
goto out;
}
+ if (!want_more)
+ goto excess_data;
+ goto out;
+
read_phase_complete:
ret = 1;
out:
@@ -637,14 +517,18 @@ out:
return ret;
short_data:
- trace_rxrpc_rx_eproto(call, 0, tracepoint_string("short_data"));
+ trace_rxrpc_abort(call->debug_id, rxrpc_recvmsg_short_data,
+ call->cid, call->call_id, call->rx_consumed,
+ 0, -EBADMSG);
ret = -EBADMSG;
goto out;
excess_data:
- trace_rxrpc_rx_eproto(call, 0, tracepoint_string("excess_data"));
+ trace_rxrpc_abort(call->debug_id, rxrpc_recvmsg_excess_data,
+ call->cid, call->call_id, call->rx_consumed,
+ 0, -EMSGSIZE);
ret = -EMSGSIZE;
goto out;
-call_complete:
+call_failed:
*_abort = call->abort_code;
ret = call->error;
if (call->completion == RXRPC_CALL_SUCCEEDED) {
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index d1233720e05f..1bf571a66e02 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -411,18 +411,15 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt iv;
struct scatterlist sg[16];
- bool aborted;
u32 data_size, buf;
u16 check;
int ret;
_enter("");
- if (sp->len < 8) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_hdr", "V1H",
- RXKADSEALEDINCON);
- goto protocol_error;
- }
+ if (sp->len < 8)
+ return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+ rxkad_abort_1_short_header);
/* Decrypt the skbuff in-place. TODO: We really want to decrypt
* directly into the target buffer.
@@ -442,11 +439,9 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
skcipher_request_zero(req);
/* Extract the decrypted packet length */
- if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_len", "XV1",
- RXKADDATALEN);
- goto protocol_error;
- }
+ if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0)
+ return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
+ rxkad_abort_1_short_encdata);
sp->offset += sizeof(sechdr);
sp->len -= sizeof(sechdr);
@@ -456,26 +451,16 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
check = buf >> 16;
check ^= seq ^ call->call_id;
check &= 0xffff;
- if (check != 0) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_check", "V1C",
- RXKADSEALEDINCON);
- goto protocol_error;
- }
-
- if (data_size > sp->len) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_datalen", "V1L",
- RXKADDATALEN);
- goto protocol_error;
- }
+ if (check != 0)
+ return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+ rxkad_abort_1_short_check);
+ if (data_size > sp->len)
+ return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
+ rxkad_abort_1_short_data);
sp->len = data_size;
_leave(" = 0 [dlen=%x]", data_size);
return 0;
-
-protocol_error:
- if (aborted)
- rxrpc_send_abort_packet(call);
- return -EPROTO;
}
/*
@@ -490,18 +475,15 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt iv;
struct scatterlist _sg[4], *sg;
- bool aborted;
u32 data_size, buf;
u16 check;
int nsg, ret;
_enter(",{%d}", sp->len);
- if (sp->len < 8) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_hdr", "V2H",
- RXKADSEALEDINCON);
- goto protocol_error;
- }
+ if (sp->len < 8)
+ return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+ rxkad_abort_2_short_header);
/* Decrypt the skbuff in-place. TODO: We really want to decrypt
* directly into the target buffer.
@@ -513,7 +495,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
} else {
sg = kmalloc_array(nsg, sizeof(*sg), GFP_NOIO);
if (!sg)
- goto nomem;
+ return -ENOMEM;
}
sg_init_table(sg, nsg);
@@ -537,11 +519,9 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
kfree(sg);
/* Extract the decrypted packet length */
- if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_len", "XV2",
- RXKADDATALEN);
- goto protocol_error;
- }
+ if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0)
+ return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
+ rxkad_abort_2_short_len);
sp->offset += sizeof(sechdr);
sp->len -= sizeof(sechdr);
@@ -551,30 +531,17 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
check = buf >> 16;
check ^= seq ^ call->call_id;
check &= 0xffff;
- if (check != 0) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_check", "V2C",
- RXKADSEALEDINCON);
- goto protocol_error;
- }
+ if (check != 0)
+ return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+ rxkad_abort_2_short_check);
- if (data_size > sp->len) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_datalen", "V2L",
- RXKADDATALEN);
- goto protocol_error;
- }
+ if (data_size > sp->len)
+ return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
+ rxkad_abort_2_short_data);
sp->len = data_size;
_leave(" = 0 [dlen=%x]", data_size);
return 0;
-
-protocol_error:
- if (aborted)
- rxrpc_send_abort_packet(call);
- return -EPROTO;
-
-nomem:
- _leave(" = -ENOMEM");
- return -ENOMEM;
}
/*
@@ -590,7 +557,6 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
__be32 buf[2];
} crypto __aligned(8);
rxrpc_seq_t seq = sp->hdr.seq;
- bool aborted;
int ret;
u16 cksum;
u32 x, y;
@@ -627,9 +593,9 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
cksum = 1; /* zero checksums are not permitted */
if (cksum != sp->hdr.cksum) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_csum", "VCK",
- RXKADSEALEDINCON);
- goto protocol_error;
+ ret = rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+ rxkad_abort_bad_checksum);
+ goto out;
}
switch (call->conn->security_level) {
@@ -647,13 +613,9 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
break;
}
+out:
skcipher_request_free(req);
return ret;
-
-protocol_error:
- if (aborted)
- rxrpc_send_abort_packet(call);
- return -EPROTO;
}
/*
@@ -821,34 +783,30 @@ static int rxkad_encrypt_response(struct rxrpc_connection *conn,
* respond to a challenge packet
*/
static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
- struct sk_buff *skb,
- u32 *_abort_code)
+ struct sk_buff *skb)
{
const struct rxrpc_key_token *token;
struct rxkad_challenge challenge;
struct rxkad_response *resp;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- const char *eproto;
- u32 version, nonce, min_level, abort_code;
- int ret;
+ u32 version, nonce, min_level;
+ int ret = -EPROTO;
_enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
- eproto = tracepoint_string("chall_no_key");
- abort_code = RX_PROTOCOL_ERROR;
if (!conn->key)
- goto protocol_error;
+ return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
+ rxkad_abort_chall_no_key);
- abort_code = RXKADEXPIRED;
ret = key_validate(conn->key);
if (ret < 0)
- goto other_error;
+ return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, ret,
+ rxkad_abort_chall_key_expired);
- eproto = tracepoint_string("chall_short");
- abort_code = RXKADPACKETSHORT;
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
&challenge, sizeof(challenge)) < 0)
- goto protocol_error;
+ return rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO,
+ rxkad_abort_chall_short);
version = ntohl(challenge.version);
nonce = ntohl(challenge.nonce);
@@ -856,15 +814,13 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
trace_rxrpc_rx_challenge(conn, sp->hdr.serial, version, nonce, min_level);
- eproto = tracepoint_string("chall_ver");
- abort_code = RXKADINCONSISTENCY;
if (version != RXKAD_VERSION)
- goto protocol_error;
+ return rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO,
+ rxkad_abort_chall_version);
- abort_code = RXKADLEVELFAIL;
- ret = -EACCES;
if (conn->security_level < min_level)
- goto other_error;
+ return rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EACCES,
+ rxkad_abort_chall_level);
token = conn->key->payload.data[0];
@@ -893,13 +849,6 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad);
kfree(resp);
return ret;
-
-protocol_error:
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
- ret = -EPROTO;
-other_error:
- *_abort_code = abort_code;
- return ret;
}
/*
@@ -910,20 +859,15 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
struct sk_buff *skb,
void *ticket, size_t ticket_len,
struct rxrpc_crypt *_session_key,
- time64_t *_expiry,
- u32 *_abort_code)
+ time64_t *_expiry)
{
struct skcipher_request *req;
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt iv, key;
struct scatterlist sg[1];
struct in_addr addr;
unsigned int life;
- const char *eproto;
time64_t issue, now;
bool little_endian;
- int ret;
- u32 abort_code;
u8 *p, *q, *name, *end;
_enter("{%d},{%x}", conn->debug_id, key_serial(server_key));
@@ -935,10 +879,9 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
memcpy(&iv, &server_key->payload.data[2], sizeof(iv));
- ret = -ENOMEM;
req = skcipher_request_alloc(server_key->payload.data[0], GFP_NOFS);
if (!req)
- goto temporary_error;
+ return -ENOMEM;
sg_init_one(&sg[0], ticket, ticket_len);
skcipher_request_set_callback(req, 0, NULL, NULL);
@@ -949,18 +892,21 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
p = ticket;
end = p + ticket_len;
-#define Z(field) \
- ({ \
- u8 *__str = p; \
- eproto = tracepoint_string("rxkad_bad_"#field); \
- q = memchr(p, 0, end - p); \
- if (!q || q - p > (field##_SZ)) \
- goto bad_ticket; \
- for (; p < q; p++) \
- if (!isprint(*p)) \
- goto bad_ticket; \
- p++; \
- __str; \
+#define Z(field, fieldl) \
+ ({ \
+ u8 *__str = p; \
+ q = memchr(p, 0, end - p); \
+ if (!q || q - p > field##_SZ) \
+ return rxrpc_abort_conn( \
+ conn, skb, RXKADBADTICKET, -EPROTO, \
+ rxkad_abort_resp_tkt_##fieldl); \
+ for (; p < q; p++) \
+ if (!isprint(*p)) \
+ return rxrpc_abort_conn( \
+ conn, skb, RXKADBADTICKET, -EPROTO, \
+ rxkad_abort_resp_tkt_##fieldl); \
+ p++; \
+ __str; \
})
/* extract the ticket flags */
@@ -969,20 +915,20 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
p++;
/* extract the authentication name */
- name = Z(ANAME);
+ name = Z(ANAME, aname);
_debug("KIV ANAME: %s", name);
/* extract the principal's instance */
- name = Z(INST);
+ name = Z(INST, inst);
_debug("KIV INST : %s", name);
/* extract the principal's authentication domain */
- name = Z(REALM);
+ name = Z(REALM, realm);
_debug("KIV REALM: %s", name);
- eproto = tracepoint_string("rxkad_bad_len");
if (end - p < 4 + 8 + 4 + 2)
- goto bad_ticket;
+ return rxrpc_abort_conn(conn, skb, RXKADBADTICKET, -EPROTO,
+ rxkad_abort_resp_tkt_short);
/* get the IPv4 address of the entity that requested the ticket */
memcpy(&addr, p, sizeof(addr));
@@ -1014,38 +960,23 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
_debug("KIV ISSUE: %llx [%llx]", issue, now);
/* check the ticket is in date */
- if (issue > now) {
- abort_code = RXKADNOAUTH;
- ret = -EKEYREJECTED;
- goto other_error;
- }
-
- if (issue < now - life) {
- abort_code = RXKADEXPIRED;
- ret = -EKEYEXPIRED;
- goto other_error;
- }
+ if (issue > now)
+ return rxrpc_abort_conn(conn, skb, RXKADNOAUTH, -EKEYREJECTED,
+ rxkad_abort_resp_tkt_future);
+ if (issue < now - life)
+ return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, -EKEYEXPIRED,
+ rxkad_abort_resp_tkt_expired);
*_expiry = issue + life;
/* get the service name */
- name = Z(SNAME);
+ name = Z(SNAME, sname);
_debug("KIV SNAME: %s", name);
/* get the service instance name */
- name = Z(INST);
+ name = Z(INST, sinst);
_debug("KIV SINST: %s", name);
return 0;
-
-bad_ticket:
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
- abort_code = RXKADBADTICKET;
- ret = -EPROTO;
-other_error:
- *_abort_code = abort_code;
- return ret;
-temporary_error:
- return ret;
}
/*
@@ -1086,17 +1017,15 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
* verify a response
*/
static int rxkad_verify_response(struct rxrpc_connection *conn,
- struct sk_buff *skb,
- u32 *_abort_code)
+ struct sk_buff *skb)
{
struct rxkad_response *response;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt session_key;
struct key *server_key;
- const char *eproto;
time64_t expiry;
void *ticket;
- u32 abort_code, version, kvno, ticket_len, level;
+ u32 version, kvno, ticket_len, level;
__be32 csum;
int ret, i;
@@ -1104,22 +1033,18 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
server_key = rxrpc_look_up_server_security(conn, skb, 0, 0);
if (IS_ERR(server_key)) {
- switch (PTR_ERR(server_key)) {
+ ret = PTR_ERR(server_key);
+ switch (ret) {
case -ENOKEY:
- abort_code = RXKADUNKNOWNKEY;
- break;
+ return rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, ret,
+ rxkad_abort_resp_nokey);
case -EKEYEXPIRED:
- abort_code = RXKADEXPIRED;
- break;
+ return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, ret,
+ rxkad_abort_resp_key_expired);
default:
- abort_code = RXKADNOAUTH;
- break;
+ return rxrpc_abort_conn(conn, skb, RXKADNOAUTH, ret,
+ rxkad_abort_resp_key_rejected);
}
- trace_rxrpc_abort(0, "SVK",
- sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- abort_code, PTR_ERR(server_key));
- *_abort_code = abort_code;
- return -EPROTO;
}
ret = -ENOMEM;
@@ -1127,11 +1052,12 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
if (!response)
goto temporary_error;
- eproto = tracepoint_string("rxkad_rsp_short");
- abort_code = RXKADPACKETSHORT;
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
- response, sizeof(*response)) < 0)
+ response, sizeof(*response)) < 0) {
+ rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO,
+ rxkad_abort_resp_short);
goto protocol_error;
+ }
version = ntohl(response->version);
ticket_len = ntohl(response->ticket_len);
@@ -1139,20 +1065,23 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
trace_rxrpc_rx_response(conn, sp->hdr.serial, version, kvno, ticket_len);
- eproto = tracepoint_string("rxkad_rsp_ver");
- abort_code = RXKADINCONSISTENCY;
- if (version != RXKAD_VERSION)
+ if (version != RXKAD_VERSION) {
+ rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO,
+ rxkad_abort_resp_version);
goto protocol_error;
+ }
- eproto = tracepoint_string("rxkad_rsp_tktlen");
- abort_code = RXKADTICKETLEN;
- if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN)
+ if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN) {
+ rxrpc_abort_conn(conn, skb, RXKADTICKETLEN, -EPROTO,
+ rxkad_abort_resp_tkt_len);
goto protocol_error;
+ }
- eproto = tracepoint_string("rxkad_rsp_unkkey");
- abort_code = RXKADUNKNOWNKEY;
- if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5)
+ if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5) {
+ rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, -EPROTO,
+ rxkad_abort_resp_unknown_tkt);
goto protocol_error;
+ }
/* extract the kerberos ticket and decrypt and decode it */
ret = -ENOMEM;
@@ -1160,15 +1089,15 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
if (!ticket)
goto temporary_error_free_resp;
- eproto = tracepoint_string("rxkad_tkt_short");
- abort_code = RXKADPACKETSHORT;
- ret = skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response),
- ticket, ticket_len);
- if (ret < 0)
- goto temporary_error_free_ticket;
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response),
+ ticket, ticket_len) < 0) {
+ rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO,
+ rxkad_abort_resp_short_tkt);
+ goto protocol_error;
+ }
ret = rxkad_decrypt_ticket(conn, server_key, skb, ticket, ticket_len,
- &session_key, &expiry, _abort_code);
+ &session_key, &expiry);
if (ret < 0)
goto temporary_error_free_ticket;
@@ -1176,56 +1105,61 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
* response */
rxkad_decrypt_response(conn, response, &session_key);
- eproto = tracepoint_string("rxkad_rsp_param");
- abort_code = RXKADSEALEDINCON;
- if (ntohl(response->encrypted.epoch) != conn->proto.epoch)
- goto protocol_error_free;
- if (ntohl(response->encrypted.cid) != conn->proto.cid)
- goto protocol_error_free;
- if (ntohl(response->encrypted.securityIndex) != conn->security_ix)
+ if (ntohl(response->encrypted.epoch) != conn->proto.epoch ||
+ ntohl(response->encrypted.cid) != conn->proto.cid ||
+ ntohl(response->encrypted.securityIndex) != conn->security_ix) {
+ rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+ rxkad_abort_resp_bad_param);
goto protocol_error_free;
+ }
+
csum = response->encrypted.checksum;
response->encrypted.checksum = 0;
rxkad_calc_response_checksum(response);
- eproto = tracepoint_string("rxkad_rsp_csum");
- if (response->encrypted.checksum != csum)
+ if (response->encrypted.checksum != csum) {
+ rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+ rxkad_abort_resp_bad_checksum);
goto protocol_error_free;
+ }
- spin_lock(&conn->bundle->channel_lock);
for (i = 0; i < RXRPC_MAXCALLS; i++) {
- struct rxrpc_call *call;
u32 call_id = ntohl(response->encrypted.call_id[i]);
+ u32 counter = READ_ONCE(conn->channels[i].call_counter);
+
+ if (call_id > INT_MAX) {
+ rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+ rxkad_abort_resp_bad_callid);
+ goto protocol_error_free;
+ }
- eproto = tracepoint_string("rxkad_rsp_callid");
- if (call_id > INT_MAX)
- goto protocol_error_unlock;
-
- eproto = tracepoint_string("rxkad_rsp_callctr");
- if (call_id < conn->channels[i].call_counter)
- goto protocol_error_unlock;
-
- eproto = tracepoint_string("rxkad_rsp_callst");
- if (call_id > conn->channels[i].call_counter) {
- call = rcu_dereference_protected(
- conn->channels[i].call,
- lockdep_is_held(&conn->bundle->channel_lock));
- if (call && call->state < RXRPC_CALL_COMPLETE)
- goto protocol_error_unlock;
+ if (call_id < counter) {
+ rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+ rxkad_abort_resp_call_ctr);
+ goto protocol_error_free;
+ }
+
+ if (call_id > counter) {
+ if (conn->channels[i].call) {
+ rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+ rxkad_abort_resp_call_state);
+ goto protocol_error_free;
+ }
conn->channels[i].call_counter = call_id;
}
}
- spin_unlock(&conn->bundle->channel_lock);
- eproto = tracepoint_string("rxkad_rsp_seq");
- abort_code = RXKADOUTOFSEQUENCE;
- if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1)
+ if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1) {
+ rxrpc_abort_conn(conn, skb, RXKADOUTOFSEQUENCE, -EPROTO,
+ rxkad_abort_resp_ooseq);
goto protocol_error_free;
+ }
- eproto = tracepoint_string("rxkad_rsp_level");
- abort_code = RXKADLEVELFAIL;
level = ntohl(response->encrypted.level);
- if (level > RXRPC_SECURITY_ENCRYPT)
+ if (level > RXRPC_SECURITY_ENCRYPT) {
+ rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EPROTO,
+ rxkad_abort_resp_level);
goto protocol_error_free;
+ }
conn->security_level = level;
/* create a key to hold the security data and expiration time - after
@@ -1240,15 +1174,11 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
_leave(" = 0");
return 0;
-protocol_error_unlock:
- spin_unlock(&conn->bundle->channel_lock);
protocol_error_free:
kfree(ticket);
protocol_error:
kfree(response);
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
key_put(server_key);
- *_abort_code = abort_code;
return -EPROTO;
temporary_error_free_ticket:
diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c
index d33a109e846c..16dcabb71ebe 100644
--- a/net/rxrpc/rxperf.c
+++ b/net/rxrpc/rxperf.c
@@ -10,6 +10,8 @@
#include <linux/slab.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
+#define RXRPC_TRACE_ONLY_DEFINE_ENUMS
+#include <trace/events/rxrpc.h>
MODULE_DESCRIPTION("rxperf test server (afs)");
MODULE_AUTHOR("Red Hat, Inc.");
@@ -307,12 +309,14 @@ static void rxperf_deliver_to_call(struct work_struct *work)
case -EOPNOTSUPP:
abort_code = RXGEN_OPCODE;
rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
- abort_code, ret, "GOP");
+ abort_code, ret,
+ rxperf_abort_op_not_supported);
goto call_complete;
case -ENOTSUPP:
abort_code = RX_USER_ABORT;
rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
- abort_code, ret, "GUA");
+ abort_code, ret,
+ rxperf_abort_op_not_supported);
goto call_complete;
case -EIO:
pr_err("Call %u in bad state %u\n",
@@ -324,11 +328,13 @@ static void rxperf_deliver_to_call(struct work_struct *work)
case -ENOMEM:
case -EFAULT:
rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
- RXGEN_SS_UNMARSHAL, ret, "GUM");
+ RXGEN_SS_UNMARSHAL, ret,
+ rxperf_abort_unmarshal_error);
goto call_complete;
default:
rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
- RX_CALL_DEAD, ret, "GER");
+ RX_CALL_DEAD, ret,
+ rxperf_abort_general_error);
goto call_complete;
}
}
@@ -523,7 +529,8 @@ static int rxperf_process_call(struct rxperf_call *call)
if (n == -ENOMEM)
rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
- RXGEN_SS_MARSHAL, -ENOMEM, "GOM");
+ RXGEN_SS_MARSHAL, -ENOMEM,
+ rxperf_abort_oom);
return n;
}
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index ab968f65a490..cb8dd1d3b1d4 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -97,38 +97,31 @@ found:
*/
int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
{
- const struct rxrpc_security *sec;
struct rxrpc_key_token *token;
struct key *key = conn->key;
- int ret;
+ int ret = 0;
_enter("{%d},{%x}", conn->debug_id, key_serial(key));
- if (!key)
- return 0;
-
- ret = key_validate(key);
- if (ret < 0)
- return ret;
-
for (token = key->payload.data[0]; token; token = token->next) {
- sec = rxrpc_security_lookup(token->security_index);
- if (sec)
+ if (token->security_index == conn->security->security_index)
goto found;
}
return -EKEYREJECTED;
found:
- conn->security = sec;
-
- ret = conn->security->init_connection_security(conn, token);
- if (ret < 0) {
- conn->security = &rxrpc_no_security;
- return ret;
+ mutex_lock(&conn->security_lock);
+ if (conn->state == RXRPC_CONN_CLIENT_UNSECURED) {
+ ret = conn->security->init_connection_security(conn, token);
+ if (ret == 0) {
+ spin_lock(&conn->state_lock);
+ if (conn->state == RXRPC_CONN_CLIENT_UNSECURED)
+ conn->state = RXRPC_CONN_CLIENT;
+ spin_unlock(&conn->state_lock);
+ }
}
-
- _leave(" = 0");
- return 0;
+ mutex_unlock(&conn->security_lock);
+ return ret;
}
/*
@@ -144,21 +137,15 @@ const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *rx,
sec = rxrpc_security_lookup(sp->hdr.securityIndex);
if (!sec) {
- trace_rxrpc_abort(0, "SVS",
- sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_INVALID_OPERATION, EKEYREJECTED);
- skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
- skb->priority = RX_INVALID_OPERATION;
+ rxrpc_direct_abort(skb, rxrpc_abort_unsupported_security,
+ RX_INVALID_OPERATION, -EKEYREJECTED);
return NULL;
}
if (sp->hdr.securityIndex != RXRPC_SECURITY_NONE &&
!rx->securities) {
- trace_rxrpc_abort(0, "SVR",
- sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_INVALID_OPERATION, EKEYREJECTED);
- skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
- skb->priority = sec->no_key_abort;
+ rxrpc_direct_abort(skb, rxrpc_abort_no_service_key,
+ sec->no_key_abort, -EKEYREJECTED);
return NULL;
}
@@ -191,9 +178,9 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *conn,
sprintf(kdesc, "%u:%u",
sp->hdr.serviceId, sp->hdr.securityIndex);
- rcu_read_lock();
+ read_lock(&conn->local->services_lock);
- rx = rcu_dereference(conn->local->service);
+ rx = conn->local->service;
if (!rx)
goto out;
@@ -215,6 +202,6 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *conn,
}
out:
- rcu_read_unlock();
+ read_unlock(&conn->local->services_lock);
return key;
}
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index cde1e65f16b4..da49fcf1c456 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -18,6 +18,81 @@
#include "ar-internal.h"
/*
+ * Propose an abort to be made in the I/O thread.
+ */
+bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error,
+ enum rxrpc_abort_reason why)
+{
+ _enter("{%d},%d,%d,%u", call->debug_id, abort_code, error, why);
+
+ if (!call->send_abort && !rxrpc_call_is_complete(call)) {
+ call->send_abort_why = why;
+ call->send_abort_err = error;
+ call->send_abort_seq = 0;
+ /* Request abort locklessly vs rxrpc_input_call_event(). */
+ smp_store_release(&call->send_abort, abort_code);
+ rxrpc_poke_call(call, rxrpc_call_poke_abort);
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Wait for a call to become connected. Interruption here doesn't cause the
+ * call to be aborted.
+ */
+static int rxrpc_wait_to_be_connected(struct rxrpc_call *call, long *timeo)
+{
+ DECLARE_WAITQUEUE(myself, current);
+ int ret = 0;
+
+ _enter("%d", call->debug_id);
+
+ if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN)
+ return call->error;
+
+ add_wait_queue_exclusive(&call->waitq, &myself);
+
+ for (;;) {
+ ret = call->error;
+ if (ret < 0)
+ break;
+
+ switch (call->interruptibility) {
+ case RXRPC_INTERRUPTIBLE:
+ case RXRPC_PREINTERRUPTIBLE:
+ set_current_state(TASK_INTERRUPTIBLE);
+ break;
+ case RXRPC_UNINTERRUPTIBLE:
+ default:
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ break;
+ }
+ if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) {
+ ret = call->error;
+ break;
+ }
+ if ((call->interruptibility == RXRPC_INTERRUPTIBLE ||
+ call->interruptibility == RXRPC_PREINTERRUPTIBLE) &&
+ signal_pending(current)) {
+ ret = sock_intr_errno(*timeo);
+ break;
+ }
+ *timeo = schedule_timeout(*timeo);
+ }
+
+ remove_wait_queue(&call->waitq, &myself);
+ __set_current_state(TASK_RUNNING);
+
+ if (ret == 0 && rxrpc_call_is_complete(call))
+ ret = call->error;
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
* Return true if there's sufficient Tx queue space.
*/
static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win)
@@ -39,7 +114,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
if (rxrpc_check_tx_space(call, NULL))
return 0;
- if (call->state >= RXRPC_CALL_COMPLETE)
+ if (rxrpc_call_is_complete(call))
return call->error;
if (signal_pending(current))
@@ -74,7 +149,7 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx,
if (rxrpc_check_tx_space(call, &tx_win))
return 0;
- if (call->state >= RXRPC_CALL_COMPLETE)
+ if (rxrpc_call_is_complete(call))
return call->error;
if (timeout == 0 &&
@@ -103,7 +178,7 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
if (rxrpc_check_tx_space(call, NULL))
return 0;
- if (call->state >= RXRPC_CALL_COMPLETE)
+ if (rxrpc_call_is_complete(call))
return call->error;
trace_rxrpc_txqueue(call, rxrpc_txqueue_wait);
@@ -168,7 +243,6 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
struct rxrpc_txbuf *txb,
rxrpc_notify_end_tx_t notify_end_tx)
{
- unsigned long now;
rxrpc_seq_t seq = txb->seq;
bool last = test_bit(RXRPC_TXBUF_LAST, &txb->flags), poke;
@@ -191,36 +265,10 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
poke = list_empty(&call->tx_sendmsg);
list_add_tail(&txb->call_link, &call->tx_sendmsg);
call->tx_prepared = seq;
+ if (last)
+ rxrpc_notify_end_tx(rx, call, notify_end_tx);
spin_unlock(&call->tx_lock);
- if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
- _debug("________awaiting reply/ACK__________");
- write_lock(&call->state_lock);
- switch (call->state) {
- case RXRPC_CALL_CLIENT_SEND_REQUEST:
- call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
- rxrpc_notify_end_tx(rx, call, notify_end_tx);
- break;
- case RXRPC_CALL_SERVER_ACK_REQUEST:
- call->state = RXRPC_CALL_SERVER_SEND_REPLY;
- now = jiffies;
- WRITE_ONCE(call->delay_ack_at, now + MAX_JIFFY_OFFSET);
- if (call->ackr_reason == RXRPC_ACK_DELAY)
- call->ackr_reason = 0;
- trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now);
- if (!last)
- break;
- fallthrough;
- case RXRPC_CALL_SERVER_SEND_REPLY:
- call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
- rxrpc_notify_end_tx(rx, call, notify_end_tx);
- break;
- default:
- break;
- }
- write_unlock(&call->state_lock);
- }
-
if (poke)
rxrpc_poke_call(call, rxrpc_call_poke_start);
}
@@ -245,6 +293,16 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+ ret = rxrpc_wait_to_be_connected(call, &timeo);
+ if (ret < 0)
+ return ret;
+
+ if (call->conn->state == RXRPC_CONN_CLIENT_UNSECURED) {
+ ret = rxrpc_init_client_conn_security(call->conn);
+ if (ret < 0)
+ return ret;
+ }
+
/* this should be in poll */
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
@@ -252,15 +310,20 @@ reload:
ret = -EPIPE;
if (sk->sk_shutdown & SEND_SHUTDOWN)
goto maybe_error;
- state = READ_ONCE(call->state);
+ state = rxrpc_call_state(call);
ret = -ESHUTDOWN;
if (state >= RXRPC_CALL_COMPLETE)
goto maybe_error;
ret = -EPROTO;
if (state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
state != RXRPC_CALL_SERVER_ACK_REQUEST &&
- state != RXRPC_CALL_SERVER_SEND_REPLY)
+ state != RXRPC_CALL_SERVER_SEND_REPLY) {
+ /* Request phase complete for this client call */
+ trace_rxrpc_abort(call->debug_id, rxrpc_sendmsg_late_send,
+ call->cid, call->call_id, call->rx_consumed,
+ 0, -EPROTO);
goto maybe_error;
+ }
ret = -EMSGSIZE;
if (call->tx_total_len != -1) {
@@ -329,7 +392,7 @@ reload:
/* check for the far side aborting the call or a network error
* occurring */
- if (call->state == RXRPC_CALL_COMPLETE)
+ if (rxrpc_call_is_complete(call))
goto call_terminated;
/* add the packet to the send queue if it's now full */
@@ -354,12 +417,9 @@ reload:
success:
ret = copied;
- if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) {
- read_lock(&call->state_lock);
- if (call->error < 0)
- ret = call->error;
- read_unlock(&call->state_lock);
- }
+ if (rxrpc_call_is_complete(call) &&
+ call->error < 0)
+ ret = call->error;
out:
call->tx_pending = txb;
_leave(" = %d", ret);
@@ -543,7 +603,6 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
atomic_inc_return(&rxrpc_debug_id));
/* The socket is now unlocked */
- rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp);
_leave(" = %p\n", call);
return call;
}
@@ -556,7 +615,6 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
__releases(&rx->sk.sk_lock.slock)
{
- enum rxrpc_call_state state;
struct rxrpc_call *call;
unsigned long now, j;
bool dropped_lock = false;
@@ -598,10 +656,10 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
return PTR_ERR(call);
/* ... and we have the call lock. */
ret = 0;
- if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE)
+ if (rxrpc_call_is_complete(call))
goto out_put_unlock;
} else {
- switch (READ_ONCE(call->state)) {
+ switch (rxrpc_call_state(call)) {
case RXRPC_CALL_UNINITIALISED:
case RXRPC_CALL_CLIENT_AWAIT_CONN:
case RXRPC_CALL_SERVER_PREALLOC:
@@ -655,17 +713,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
break;
}
- state = READ_ONCE(call->state);
- _debug("CALL %d USR %lx ST %d on CONN %p",
- call->debug_id, call->user_call_ID, state, call->conn);
-
- if (state >= RXRPC_CALL_COMPLETE) {
+ if (rxrpc_call_is_complete(call)) {
/* it's too late for this call */
ret = -ESHUTDOWN;
} else if (p.command == RXRPC_CMD_SEND_ABORT) {
+ rxrpc_propose_abort(call, p.abort_code, -ECONNABORTED,
+ rxrpc_abort_call_sendmsg);
ret = 0;
- if (rxrpc_abort_call("CMD", call, 0, p.abort_code, -ECONNABORTED))
- ret = rxrpc_send_abort_packet(call);
} else if (p.command != RXRPC_CMD_SEND_DATA) {
ret = -EINVAL;
} else {
@@ -705,34 +759,17 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
bool dropped_lock = false;
int ret;
- _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
+ _enter("{%d},", call->debug_id);
ASSERTCMP(msg->msg_name, ==, NULL);
ASSERTCMP(msg->msg_control, ==, NULL);
mutex_lock(&call->user_mutex);
- _debug("CALL %d USR %lx ST %d on CONN %p",
- call->debug_id, call->user_call_ID, call->state, call->conn);
-
- switch (READ_ONCE(call->state)) {
- case RXRPC_CALL_CLIENT_SEND_REQUEST:
- case RXRPC_CALL_SERVER_ACK_REQUEST:
- case RXRPC_CALL_SERVER_SEND_REPLY:
- ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
- notify_end_tx, &dropped_lock);
- break;
- case RXRPC_CALL_COMPLETE:
- read_lock(&call->state_lock);
+ ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
+ notify_end_tx, &dropped_lock);
+ if (ret == -ESHUTDOWN)
ret = call->error;
- read_unlock(&call->state_lock);
- break;
- default:
- /* Request phase complete for this client call */
- trace_rxrpc_rx_eproto(call, 0, tracepoint_string("late_send"));
- ret = -EPROTO;
- break;
- }
if (!dropped_lock)
mutex_unlock(&call->user_mutex);
@@ -747,24 +784,20 @@ EXPORT_SYMBOL(rxrpc_kernel_send_data);
* @call: The call to be aborted
* @abort_code: The abort code to stick into the ABORT packet
* @error: Local error value
- * @why: 3-char string indicating why.
+ * @why: Indication as to why.
*
* Allow a kernel service to abort a call, if it's still in an abortable state
* and return true if the call was aborted, false if it was already complete.
*/
bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
- u32 abort_code, int error, const char *why)
+ u32 abort_code, int error, enum rxrpc_abort_reason why)
{
bool aborted;
- _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why);
+ _enter("{%d},%d,%d,%u", call->debug_id, abort_code, error, why);
mutex_lock(&call->user_mutex);
-
- aborted = rxrpc_abort_call(why, call, 0, abort_code, error);
- if (aborted)
- rxrpc_send_abort_packet(call);
-
+ aborted = rxrpc_propose_abort(call, abort_code, error, why);
mutex_unlock(&call->user_mutex);
return aborted;
}
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index 4b1b59da5c0b..4d15b6a6169c 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -93,7 +93,7 @@ TC_INDIRECT_SCOPE int tcf_ctinfo_act(struct sk_buff *skb,
cp = rcu_dereference_bh(ca->params);
tcf_lastuse_update(&ca->tcf_tm);
- bstats_update(&ca->tcf_bstats, skb);
+ tcf_action_update_bstats(&ca->common, skb);
action = READ_ONCE(ca->tcf_action);
wlen = skb_network_offset(skb);
@@ -212,8 +212,8 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
index = actparm->index;
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_ctinfo_ops, bind, false, flags);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_ctinfo_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index ff47ce4d3968..6b26bdb999d7 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -134,6 +134,11 @@ static int valid_label(const struct nlattr *attr,
{
const u32 *label = nla_data(attr);
+ if (nla_len(attr) != sizeof(*label)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid MPLS label length");
+ return -EINVAL;
+ }
+
if (*label & ~MPLS_LABEL_MASK || *label == MPLS_LABEL_IMPLNULL) {
NL_SET_ERR_MSG_MOD(extack, "MPLS label out of range");
return -EINVAL;
@@ -145,7 +150,8 @@ static int valid_label(const struct nlattr *attr,
static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = {
[TCA_MPLS_PARMS] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_mpls)),
[TCA_MPLS_PROTO] = { .type = NLA_U16 },
- [TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_U32, valid_label),
+ [TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_BINARY,
+ valid_label),
[TCA_MPLS_TC] = NLA_POLICY_RANGE(NLA_U8, 0, 7),
[TCA_MPLS_TTL] = NLA_POLICY_MIN(NLA_U8, 1),
[TCA_MPLS_BOS] = NLA_POLICY_RANGE(NLA_U8, 0, 1),
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index eb0e9458e722..6640e75eaa02 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -12,6 +12,7 @@
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/refcount.h>
+#include <linux/rcupdate.h>
#include <net/act_api.h>
#include <net/netlink.h>
#include <net/pkt_cls.h>
@@ -333,12 +334,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
struct tcindex_filter_result *r, struct nlattr **tb,
struct nlattr *est, u32 flags, struct netlink_ext_ack *extack)
{
- struct tcindex_filter_result new_filter_result, *old_r = r;
+ struct tcindex_filter_result new_filter_result;
struct tcindex_data *cp = NULL, *oldp;
struct tcindex_filter *f = NULL; /* make gcc behave */
struct tcf_result cr = {};
int err, balloc = 0;
struct tcf_exts e;
+ bool update_h = false;
err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0)
@@ -402,7 +404,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
err = tcindex_filter_result_init(&new_filter_result, cp, net);
if (err < 0)
goto errout_alloc;
- if (old_r)
+ if (r)
cr = r->res;
err = -EBUSY;
@@ -456,10 +458,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
}
}
- if (cp->perfect)
+ if (cp->perfect) {
r = cp->perfect + handle;
- else
- r = tcindex_lookup(cp, handle) ? : &new_filter_result;
+ } else {
+ /* imperfect area is updated in-place using rcu */
+ update_h = !!tcindex_lookup(cp, handle);
+ r = &new_filter_result;
+ }
if (r == &new_filter_result) {
f = kzalloc(sizeof(*f), GFP_KERNEL);
@@ -479,21 +484,34 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
tcf_bind_filter(tp, &cr, base);
}
- if (old_r && old_r != r) {
- err = tcindex_filter_result_init(old_r, cp, net);
- if (err < 0) {
- kfree(f);
- goto errout_alloc;
- }
- }
-
oldp = p;
r->res = cr;
tcf_exts_change(&r->exts, &e);
rcu_assign_pointer(tp->root, cp);
- if (r == &new_filter_result) {
+ if (update_h) {
+ struct tcindex_filter __rcu **fp;
+ struct tcindex_filter *cf;
+
+ f->result.res = r->res;
+ tcf_exts_change(&f->result.exts, &r->exts);
+
+ /* imperfect area bucket */
+ fp = cp->h + (handle % cp->hash);
+
+ /* lookup the filter, guaranteed to exist */
+ for (cf = rcu_dereference_bh_rtnl(*fp); cf;
+ fp = &cf->next, cf = rcu_dereference_bh_rtnl(*fp))
+ if (cf->key == (u16)handle)
+ break;
+
+ f->next = cf->next;
+
+ cf = rcu_replace_pointer(*fp, f, 1);
+ tcf_exts_get_net(&cf->result.exts);
+ tcf_queue_work(&cf->rwork, tcindex_destroy_fexts_work);
+ } else if (r == &new_filter_result) {
struct tcindex_filter *nfp;
struct tcindex_filter __rcu **fp;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 2317db02c764..72d2c204d5f3 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1133,6 +1133,11 @@ skip:
return -ENOENT;
}
+ if (new && new->ops == &noqueue_qdisc_ops) {
+ NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class");
+ return -EINVAL;
+ }
+
err = cops->graft(parent, cl, new, &old, extack);
if (err)
return err;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index f52255fea652..4a981ca90b0b 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -393,10 +393,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
result = tcf_classify(skb, NULL, fl, &res, true);
if (result < 0)
continue;
+ if (result == TC_ACT_SHOT)
+ goto done;
+
flow = (struct atm_flow_data *)res.class;
if (!flow)
flow = lookup_flow(sch, res.classid);
- goto done;
+ goto drop;
}
}
flow = NULL;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 6568e17c4c63..36db5f6782f2 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -230,6 +230,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
result = tcf_classify(skb, NULL, fl, &res, true);
if (!fl || result < 0)
goto fallback;
+ if (result == TC_ACT_SHOT)
+ return NULL;
cl = (void *)res.class;
if (!cl) {
@@ -250,8 +252,6 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
fallthrough;
- case TC_ACT_SHOT:
- return NULL;
case TC_ACT_RECLASSIFY:
return cbq_reclassify(skb, cl);
}
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index a661b062cca8..872d127c9db4 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -377,6 +377,7 @@ static int gred_offload_dump_stats(struct Qdisc *sch)
/* Even if driver returns failure adjust the stats - in case offload
* ended but driver still wants to adjust the values.
*/
+ sch_tree_lock(sch);
for (i = 0; i < MAX_DPs; i++) {
if (!table->tab[i])
continue;
@@ -393,6 +394,7 @@ static int gred_offload_dump_stats(struct Qdisc *sch)
sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits;
}
_bstats_update(&sch->bstats, bytes, packets);
+ sch_tree_unlock(sch);
kfree(hw_stats);
return ret;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index e5b4bbf3ce3d..92f2975b6a82 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -199,8 +199,14 @@ static unsigned long htb_search(struct Qdisc *sch, u32 handle)
{
return (unsigned long)htb_find(handle, sch);
}
+
+#define HTB_DIRECT ((struct htb_class *)-1L)
+
/**
* htb_classify - classify a packet into class
+ * @skb: the socket buffer
+ * @sch: the active queue discipline
+ * @qerr: pointer for returned status code
*
* It returns NULL if the packet should be dropped or -1 if the packet
* should be passed directly thru. In all other cases leaf class is returned.
@@ -211,8 +217,6 @@ static unsigned long htb_search(struct Qdisc *sch, u32 handle)
* have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
* then finish and return direct queue.
*/
-#define HTB_DIRECT ((struct htb_class *)-1L)
-
static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
int *qerr)
{
@@ -427,7 +431,10 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
while (cl->cmode == HTB_MAY_BORROW && p && mask) {
m = mask;
while (m) {
- int prio = ffz(~m);
+ unsigned int prio = ffz(~m);
+
+ if (WARN_ON_ONCE(prio >= ARRAY_SIZE(p->inner.clprio)))
+ break;
m &= ~(1 << prio);
if (p->inner.clprio[prio].feed.rb_node)
@@ -1545,7 +1552,7 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
struct tc_htb_qopt_offload offload_opt;
struct netdev_queue *dev_queue;
struct Qdisc *q = cl->leaf.q;
- struct Qdisc *old = NULL;
+ struct Qdisc *old;
int err;
if (cl->level)
@@ -1553,14 +1560,17 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
WARN_ON(!q);
dev_queue = htb_offload_get_queue(cl);
- old = htb_graft_helper(dev_queue, NULL);
- if (destroying)
- /* Before HTB is destroyed, the kernel grafts noop_qdisc to
- * all queues.
+ /* When destroying, caller qdisc_graft grafts the new qdisc and invokes
+ * qdisc_put for the qdisc being destroyed. htb_destroy_class_offload
+ * does not need to graft or qdisc_put the qdisc being destroyed.
+ */
+ if (!destroying) {
+ old = htb_graft_helper(dev_queue, NULL);
+ /* Last qdisc grafted should be the same as cl->leaf.q when
+ * calling htb_delete.
*/
- WARN_ON(!(old->flags & TCQ_F_BUILTIN));
- else
WARN_ON(old != q);
+ }
if (cl->parent) {
_bstats_update(&cl->parent->bstats_bias,
@@ -1577,10 +1587,12 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
};
err = htb_offload(qdisc_dev(sch), &offload_opt);
- if (!err || destroying)
- qdisc_put(old);
- else
- htb_graft_helper(dev_queue, old);
+ if (!destroying) {
+ if (!err)
+ qdisc_put(old);
+ else
+ htb_graft_helper(dev_queue, old);
+ }
if (last_child)
return err;
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 570389f6cdd7..c322a61eaeea 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1700,6 +1700,7 @@ static void taprio_reset(struct Qdisc *sch)
int i;
hrtimer_cancel(&q->advance_timer);
+
if (q->qdiscs) {
for (i = 0; i < dev->num_tx_queues; i++)
if (q->qdiscs[i])
@@ -1720,6 +1721,7 @@ static void taprio_destroy(struct Qdisc *sch)
* happens in qdisc_create(), after taprio_init() has been called.
*/
hrtimer_cancel(&q->advance_timer);
+ qdisc_synchronize(sch);
taprio_disable_offload(dev, q, NULL);
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 59e653b528b1..6b95d3ba8fe1 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -73,6 +73,12 @@ int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest,
}
}
+ /* If somehow no addresses were found that can be used with this
+ * scope, it's an error.
+ */
+ if (list_empty(&dest->address_list))
+ error = -ENETUNREACH;
+
out:
if (error)
sctp_bind_addr_clean(dest);
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index a557009e9832..c3d6b92dd386 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -343,11 +343,9 @@ static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp
struct sctp_comm_param *commp = p;
struct sock *sk = ep->base.sk;
const struct inet_diag_req_v2 *r = commp->r;
- struct sctp_association *assoc =
- list_entry(ep->asocs.next, struct sctp_association, asocs);
/* find the ep only once through the transports by this condition */
- if (tsp->asoc != assoc)
+ if (!list_is_first(&tsp->asoc->asocs, &ep->asocs))
return 0;
if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index ca1eba95c293..2f66a2006517 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -196,9 +196,7 @@ void sctp_transport_reset_hb_timer(struct sctp_transport *transport)
/* When a data chunk is sent, reset the heartbeat interval. */
expires = jiffies + sctp_transport_timeout(transport);
- if ((time_before(transport->hb_timer.expires, expires) ||
- !timer_pending(&transport->hb_timer)) &&
- !mod_timer(&transport->hb_timer,
+ if (!mod_timer(&transport->hb_timer,
expires + get_random_u32_below(transport->rto)))
sctp_transport_hold(transport);
}
diff --git a/net/socket.c b/net/socket.c
index 888cd618a968..c6c44e26e954 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -385,7 +385,7 @@ static const struct xattr_handler sockfs_xattr_handler = {
};
static int sockfs_security_xattr_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry, struct inode *inode,
const char *suffix, const void *value,
size_t size, int flags)
@@ -589,10 +589,10 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
return used;
}
-static int sockfs_setattr(struct user_namespace *mnt_userns,
+static int sockfs_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *iattr)
{
- int err = simple_setattr(&init_user_ns, dentry, iattr);
+ int err = simple_setattr(&nop_mnt_idmap, dentry, iattr);
if (!err && (iattr->ia_valid & ATTR_UID)) {
struct socket *sock = SOCKET_I(d_inode(dentry));
@@ -971,9 +971,12 @@ static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
static void sock_recv_mark(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb)
{
- if (sock_flag(sk, SOCK_RCVMARK) && skb)
- put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32),
- &skb->mark);
+ if (sock_flag(sk, SOCK_RCVMARK) && skb) {
+ /* We must use a bounce buffer for CONFIG_HARDENED_USERCOPY=y */
+ __u32 mark = skb->mark;
+
+ put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), &mark);
+ }
}
void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 7bb247c51e2f..2d7b1e03110a 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -302,7 +302,7 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth
list_for_each_entry(pos, &pipe->in_downcall, list) {
if (!uid_eq(pos->uid, uid))
continue;
- if (auth && pos->auth->service != auth->service)
+ if (pos->auth->service != auth->service)
continue;
refcount_inc(&pos->count);
return pos;
@@ -686,6 +686,21 @@ out:
return err;
}
+static struct gss_upcall_msg *
+gss_find_downcall(struct rpc_pipe *pipe, kuid_t uid)
+{
+ struct gss_upcall_msg *pos;
+ list_for_each_entry(pos, &pipe->in_downcall, list) {
+ if (!uid_eq(pos->uid, uid))
+ continue;
+ if (!rpc_msg_is_inflight(&pos->msg))
+ continue;
+ refcount_inc(&pos->count);
+ return pos;
+ }
+ return NULL;
+}
+
#define MSG_BUF_MAXSIZE 1024
static ssize_t
@@ -732,7 +747,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
err = -ENOENT;
/* Find a matching upcall */
spin_lock(&pipe->lock);
- gss_msg = __gss_find_upcall(pipe, uid, NULL);
+ gss_msg = gss_find_downcall(pipe, uid);
if (gss_msg == NULL) {
spin_unlock(&pipe->lock);
goto err_put_ctx;
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 148bb0a7fa5b..acb822b23af1 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -923,7 +923,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
* rejecting the server-computed MIC in this somewhat rare case,
* do not use splice with the GSS integrity service.
*/
- __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* Did we already verify the signature on the original pass through? */
if (rqstp->rq_deferred)
@@ -990,7 +990,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
int pad, remaining_len, offset;
u32 rseqno;
- __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
priv_len = svc_getnl(&buf->head[0]);
if (rqstp->rq_deferred) {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 85f0c3cfc877..f06622814a95 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1243,10 +1243,10 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
goto err_short_len;
/* Will be turned off by GSS integrity and privacy services */
- __set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* Will be turned off only when NFSv4 Sessions are used */
- __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
- __clear_bit(RQ_DROPME, &rqstp->rq_flags);
+ set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
+ clear_bit(RQ_DROPME, &rqstp->rq_flags);
svc_putu32(resv, rqstp->rq_xid);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 2106003645a7..c2ce12538008 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -1238,7 +1238,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
trace_svc_defer(rqstp);
svc_xprt_get(rqstp->rq_xprt);
dr->xprt = rqstp->rq_xprt;
- __set_bit(RQ_DROPME, &rqstp->rq_flags);
+ set_bit(RQ_DROPME, &rqstp->rq_flags);
dr->handle.revisit = svc_revisit;
return &dr->handle;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 015714398007..815baf308236 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -298,9 +298,9 @@ static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs)
static void svc_sock_secure_port(struct svc_rqst *rqstp)
{
if (svc_port_is_privileged(svc_addr(rqstp)))
- __set_bit(RQ_SECURE, &rqstp->rq_flags);
+ set_bit(RQ_SECURE, &rqstp->rq_flags);
else
- __clear_bit(RQ_SECURE, &rqstp->rq_flags);
+ clear_bit(RQ_SECURE, &rqstp->rq_flags);
}
/*
@@ -1008,9 +1008,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_xprt_ctxt = NULL;
rqstp->rq_prot = IPPROTO_TCP;
if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags))
- __set_bit(RQ_LOCAL, &rqstp->rq_flags);
+ set_bit(RQ_LOCAL, &rqstp->rq_flags);
else
- __clear_bit(RQ_LOCAL, &rqstp->rq_flags);
+ clear_bit(RQ_LOCAL, &rqstp->rq_flags);
p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
calldir = p[1];
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 199fa012f18a..94b20fb47135 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -602,7 +602,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
static void svc_rdma_secure_port(struct svc_rqst *rqstp)
{
- __set_bit(RQ_SECURE, &rqstp->rq_flags);
+ set_bit(RQ_SECURE, &rqstp->rq_flags);
}
static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt)
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 49ddc484c4fe..5e000fde8067 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1179,8 +1179,9 @@ void tipc_node_check_dest(struct net *net, u32 addr,
bool addr_match = false;
bool sign_match = false;
bool link_up = false;
+ bool link_is_reset = false;
bool accept_addr = false;
- bool reset = true;
+ bool reset = false;
char *if_name;
unsigned long intv;
u16 session;
@@ -1200,14 +1201,14 @@ void tipc_node_check_dest(struct net *net, u32 addr,
/* Prepare to validate requesting node's signature and media address */
l = le->link;
link_up = l && tipc_link_is_up(l);
+ link_is_reset = l && tipc_link_is_reset(l);
addr_match = l && !memcmp(&le->maddr, maddr, sizeof(*maddr));
sign_match = (signature == n->signature);
/* These three flags give us eight permutations: */
if (sign_match && addr_match && link_up) {
- /* All is fine. Do nothing. */
- reset = false;
+ /* All is fine. Ignore requests. */
/* Peer node is not a container/local namespace */
if (!n->peer_hash_mix)
n->peer_hash_mix = hash_mixes;
@@ -1232,6 +1233,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
*/
accept_addr = true;
*respond = true;
+ reset = true;
} else if (!sign_match && addr_match && link_up) {
/* Peer node rebooted. Two possibilities:
* - Delayed re-discovery; this link endpoint has already
@@ -1263,6 +1265,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
n->signature = signature;
accept_addr = true;
*respond = true;
+ reset = true;
}
if (!accept_addr)
@@ -1291,6 +1294,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
tipc_link_fsm_evt(l, LINK_RESET_EVT);
if (n->state == NODE_FAILINGOVER)
tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
+ link_is_reset = tipc_link_is_reset(l);
le->link = l;
n->link_cnt++;
tipc_node_calculate_timer(n, l);
@@ -1303,7 +1307,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
memcpy(&le->maddr, maddr, sizeof(*maddr));
exit:
tipc_node_write_unlock(n);
- if (reset && l && !tipc_link_is_reset(l))
+ if (reset && !link_is_reset)
tipc_node_link_down(n, b->identity, false);
tipc_node_put(n);
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b35c8701876a..a38733f2197a 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2614,6 +2614,7 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
/* Send a 'SYN-' to destination */
m.msg_name = dest;
m.msg_namelen = destlen;
+ iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0);
/* If connect is in non-blocking case, set MSG_DONTWAIT to
* indicate send_msg() is never blocked.
@@ -2776,6 +2777,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
__skb_queue_head(&new_sk->sk_receive_queue, buf);
skb_set_owner_r(buf, new_sk);
}
+ iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0);
__tipc_sendstream(new_sock, &m, 0);
release_sock(new_sk);
exit:
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 9ed978634125..a83d2b4275fa 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2427,7 +2427,7 @@ static bool tls_is_tx_ready(struct tls_sw_context_tx *ctx)
{
struct tls_rec *rec;
- rec = list_first_entry(&ctx->tx_list, struct tls_rec, list);
+ rec = list_first_entry_or_null(&ctx->tx_list, struct tls_rec, list);
if (!rec)
return false;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f0c2293f1d3b..81ff98298996 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1190,7 +1190,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
unsigned int new_hash, old_hash = sk->sk_hash;
struct unix_sock *u = unix_sk(sk);
struct net *net = sock_net(sk);
- struct user_namespace *ns; // barf...
+ struct mnt_idmap *idmap;
struct unix_address *addr;
struct dentry *dentry;
struct path parent;
@@ -1217,10 +1217,10 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
/*
* All right, let's create it.
*/
- ns = mnt_user_ns(parent.mnt);
+ idmap = mnt_idmap(parent.mnt);
err = security_path_mknod(&parent, dentry, mode, 0);
if (!err)
- err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0);
+ err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
if (err)
goto out_path;
err = mutex_lock_interruptible(&u->bindlock);
@@ -1245,7 +1245,7 @@ out_unlock:
err = -EINVAL;
out_unlink:
/* failed after successful mknod? unlink what we'd created... */
- vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL);
+ vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
out_path:
done_path_create(&parent, dentry);
out:
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 3b55502b2965..5c7ad301d742 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -482,6 +482,12 @@ static int x25_listen(struct socket *sock, int backlog)
int rc = -EOPNOTSUPP;
lock_sock(sk);
+ if (sock->state != SS_UNCONNECTED) {
+ rc = -EINVAL;
+ release_sock(sk);
+ return rc;
+ }
+
if (sk->sk_state != TCP_LISTEN) {
memset(&x25_sk(sk)->dest_addr, 0, X25_ADDR_LEN);
sk->sk_max_ack_backlog = backlog;
diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c
index a0f62fa02e06..8cbf45a8bcdc 100644
--- a/net/xfrm/xfrm_compat.c
+++ b/net/xfrm/xfrm_compat.c
@@ -5,6 +5,7 @@
* Based on code and translator idea by: Florian Westphal <fw@strlen.de>
*/
#include <linux/compat.h>
+#include <linux/nospec.h>
#include <linux/xfrm.h>
#include <net/xfrm.h>
@@ -302,7 +303,7 @@ static int xfrm_xlate64(struct sk_buff *dst, const struct nlmsghdr *nlh_src)
nla_for_each_attr(nla, attrs, len, remaining) {
int err;
- switch (type) {
+ switch (nlh_src->nlmsg_type) {
case XFRM_MSG_NEWSPDINFO:
err = xfrm_nla_cpy(dst, nla, nla_len(nla));
break;
@@ -437,6 +438,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla,
NL_SET_ERR_MSG(extack, "Bad attribute");
return -EOPNOTSUPP;
}
+ type = array_index_nospec(type, XFRMA_MAX + 1);
if (nla_len(nla) < compat_policy[type].len) {
NL_SET_ERR_MSG(extack, "Attribute bad length");
return -EOPNOTSUPP;
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index c06e54a10540..436d29640ac2 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -279,8 +279,7 @@ static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb)
goto out;
if (x->props.flags & XFRM_STATE_DECAP_DSCP)
- ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)),
- ipipv6_hdr(skb));
+ ipv6_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, ipipv6_hdr(skb));
if (!(x->props.flags & XFRM_STATE_NOECN))
ipip6_ecn_decapsulate(skb);
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index 1f99dc469027..35279c220bd7 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -310,6 +310,52 @@ static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
skb->mark = 0;
}
+static int xfrmi_input(struct sk_buff *skb, int nexthdr, __be32 spi,
+ int encap_type, unsigned short family)
+{
+ struct sec_path *sp;
+
+ sp = skb_sec_path(skb);
+ if (sp && (sp->len || sp->olen) &&
+ !xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
+ goto discard;
+
+ XFRM_SPI_SKB_CB(skb)->family = family;
+ if (family == AF_INET) {
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
+ } else {
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+ }
+
+ return xfrm_input(skb, nexthdr, spi, encap_type);
+discard:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int xfrmi4_rcv(struct sk_buff *skb)
+{
+ return xfrmi_input(skb, ip_hdr(skb)->protocol, 0, 0, AF_INET);
+}
+
+static int xfrmi6_rcv(struct sk_buff *skb)
+{
+ return xfrmi_input(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
+ 0, 0, AF_INET6);
+}
+
+static int xfrmi4_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
+{
+ return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET);
+}
+
+static int xfrmi6_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
+{
+ return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET6);
+}
+
static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
{
const struct xfrm_mode *inner_mode;
@@ -945,8 +991,8 @@ static struct pernet_operations xfrmi_net_ops = {
};
static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
- .handler = xfrm6_rcv,
- .input_handler = xfrm_input,
+ .handler = xfrmi6_rcv,
+ .input_handler = xfrmi6_input,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi6_err,
.priority = 10,
@@ -996,8 +1042,8 @@ static struct xfrm6_tunnel xfrmi_ip6ip_handler __read_mostly = {
#endif
static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = {
- .handler = xfrm4_rcv,
- .input_handler = xfrm_input,
+ .handler = xfrmi4_rcv,
+ .input_handler = xfrmi4_input,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi4_err,
.priority = 10,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index e9eb82c5457d..5c61ec04b839 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -336,7 +336,7 @@ static void xfrm_policy_timer(struct timer_list *t)
}
if (xp->lft.hard_use_expires_seconds) {
time64_t tmo = xp->lft.hard_use_expires_seconds +
- (xp->curlft.use_time ? : xp->curlft.add_time) - now;
+ (READ_ONCE(xp->curlft.use_time) ? : xp->curlft.add_time) - now;
if (tmo <= 0)
goto expired;
if (tmo < next)
@@ -354,7 +354,7 @@ static void xfrm_policy_timer(struct timer_list *t)
}
if (xp->lft.soft_use_expires_seconds) {
time64_t tmo = xp->lft.soft_use_expires_seconds +
- (xp->curlft.use_time ? : xp->curlft.add_time) - now;
+ (READ_ONCE(xp->curlft.use_time) ? : xp->curlft.add_time) - now;
if (tmo <= 0) {
warn = 1;
tmo = XFRM_KM_TIMEOUT;
@@ -3661,7 +3661,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
return 1;
}
- pol->curlft.use_time = ktime_get_real_seconds();
+ /* This lockless write can happen from different cpus. */
+ WRITE_ONCE(pol->curlft.use_time, ktime_get_real_seconds());
pols[0] = pol;
npols++;
@@ -3676,7 +3677,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
xfrm_pol_put(pols[0]);
return 0;
}
- pols[1]->curlft.use_time = ktime_get_real_seconds();
+ /* This write can happen from different cpus. */
+ WRITE_ONCE(pols[1]->curlft.use_time,
+ ktime_get_real_seconds());
npols++;
}
}
@@ -3742,6 +3745,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
goto reject;
}
+ if (if_id)
+ secpath_reset(skb);
+
xfrm_pols_put(pols, npols);
return 1;
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 89c731f4f0c7..00afe831c71c 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -577,7 +577,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
if (x->km.state == XFRM_STATE_EXPIRED)
goto expired;
if (x->lft.hard_add_expires_seconds) {
- long tmo = x->lft.hard_add_expires_seconds +
+ time64_t tmo = x->lft.hard_add_expires_seconds +
x->curlft.add_time - now;
if (tmo <= 0) {
if (x->xflags & XFRM_SOFT_EXPIRE) {
@@ -594,8 +594,8 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
next = tmo;
}
if (x->lft.hard_use_expires_seconds) {
- long tmo = x->lft.hard_use_expires_seconds +
- (x->curlft.use_time ? : now) - now;
+ time64_t tmo = x->lft.hard_use_expires_seconds +
+ (READ_ONCE(x->curlft.use_time) ? : now) - now;
if (tmo <= 0)
goto expired;
if (tmo < next)
@@ -604,7 +604,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
if (x->km.dying)
goto resched;
if (x->lft.soft_add_expires_seconds) {
- long tmo = x->lft.soft_add_expires_seconds +
+ time64_t tmo = x->lft.soft_add_expires_seconds +
x->curlft.add_time - now;
if (tmo <= 0) {
warn = 1;
@@ -616,8 +616,8 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
}
}
if (x->lft.soft_use_expires_seconds) {
- long tmo = x->lft.soft_use_expires_seconds +
- (x->curlft.use_time ? : now) - now;
+ time64_t tmo = x->lft.soft_use_expires_seconds +
+ (READ_ONCE(x->curlft.use_time) ? : now) - now;
if (tmo <= 0)
warn = 1;
else if (tmo < next)
@@ -1906,7 +1906,7 @@ out:
hrtimer_start(&x1->mtimer, ktime_set(1, 0),
HRTIMER_MODE_REL_SOFT);
- if (x1->curlft.use_time)
+ if (READ_ONCE(x1->curlft.use_time))
xfrm_state_check_expire(x1);
if (x->props.smark.m || x->props.smark.v || x->if_id) {
@@ -1940,8 +1940,8 @@ int xfrm_state_check_expire(struct xfrm_state *x)
{
xfrm_dev_state_update_curlft(x);
- if (!x->curlft.use_time)
- x->curlft.use_time = ktime_get_real_seconds();
+ if (!READ_ONCE(x->curlft.use_time))
+ WRITE_ONCE(x->curlft.use_time, ktime_get_real_seconds());
if (x->curlft.bytes >= x->lft.hard_byte_limit ||
x->curlft.packets >= x->lft.hard_packet_limit) {
diff --git a/rust/Makefile b/rust/Makefile
index ff70c4c916f8..8a521f2b6422 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -50,6 +50,7 @@ core-cfgs = \
--cfg no_fp_fmt_parse
alloc-cfgs = \
+ --cfg no_borrow \
--cfg no_fmt \
--cfg no_global_oom_handling \
--cfg no_macros \
@@ -359,8 +360,22 @@ rust-analyzer:
$(Q)$(srctree)/scripts/generate_rust_analyzer.py $(srctree) $(objtree) \
$(RUST_LIB_SRC) > $(objtree)/rust-project.json
+redirect-intrinsics = \
+ __eqsf2 __gesf2 __lesf2 __nesf2 __unordsf2 \
+ __unorddf2 \
+ __muloti4 __multi3 \
+ __udivmodti4 __udivti3 __umodti3
+
+ifneq ($(or $(CONFIG_ARM64),$(and $(CONFIG_RISCV),$(CONFIG_64BIT))),)
+ # These intrinsics are defined for ARM64 and RISCV64
+ redirect-intrinsics += \
+ __ashrti3 \
+ __ashlti3 __lshrti3
+endif
+
$(obj)/core.o: private skip_clippy = 1
$(obj)/core.o: private skip_flags = -Dunreachable_pub
+$(obj)/core.o: private rustc_objcopy = $(foreach sym,$(redirect-intrinsics),--redefine-sym $(sym)=__rust$(sym))
$(obj)/core.o: private rustc_target_flags = $(core-cfgs)
$(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs $(obj)/target.json FORCE
$(call if_changed_dep,rustc_library)
diff --git a/rust/alloc/borrow.rs b/rust/alloc/borrow.rs
deleted file mode 100644
index dde4957200d4..000000000000
--- a/rust/alloc/borrow.rs
+++ /dev/null
@@ -1,498 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0 OR MIT
-
-//! A module for working with borrowed data.
-
-#![stable(feature = "rust1", since = "1.0.0")]
-
-use core::cmp::Ordering;
-use core::hash::{Hash, Hasher};
-use core::ops::Deref;
-#[cfg(not(no_global_oom_handling))]
-use core::ops::{Add, AddAssign};
-
-#[stable(feature = "rust1", since = "1.0.0")]
-pub use core::borrow::{Borrow, BorrowMut};
-
-use core::fmt;
-#[cfg(not(no_global_oom_handling))]
-use crate::string::String;
-
-use Cow::*;
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<'a, B: ?Sized> Borrow<B> for Cow<'a, B>
-where
- B: ToOwned,
- <B as ToOwned>::Owned: 'a,
-{
- fn borrow(&self) -> &B {
- &**self
- }
-}
-
-/// A generalization of `Clone` to borrowed data.
-///
-/// Some types make it possible to go from borrowed to owned, usually by
-/// implementing the `Clone` trait. But `Clone` works only for going from `&T`
-/// to `T`. The `ToOwned` trait generalizes `Clone` to construct owned data
-/// from any borrow of a given type.
-#[cfg_attr(not(test), rustc_diagnostic_item = "ToOwned")]
-#[stable(feature = "rust1", since = "1.0.0")]
-pub trait ToOwned {
- /// The resulting type after obtaining ownership.
- #[stable(feature = "rust1", since = "1.0.0")]
- type Owned: Borrow<Self>;
-
- /// Creates owned data from borrowed data, usually by cloning.
- ///
- /// # Examples
- ///
- /// Basic usage:
- ///
- /// ```
- /// let s: &str = "a";
- /// let ss: String = s.to_owned();
- ///
- /// let v: &[i32] = &[1, 2];
- /// let vv: Vec<i32> = v.to_owned();
- /// ```
- #[stable(feature = "rust1", since = "1.0.0")]
- #[must_use = "cloning is often expensive and is not expected to have side effects"]
- fn to_owned(&self) -> Self::Owned;
-
- /// Uses borrowed data to replace owned data, usually by cloning.
- ///
- /// This is borrow-generalized version of `Clone::clone_from`.
- ///
- /// # Examples
- ///
- /// Basic usage:
- ///
- /// ```
- /// # #![feature(toowned_clone_into)]
- /// let mut s: String = String::new();
- /// "hello".clone_into(&mut s);
- ///
- /// let mut v: Vec<i32> = Vec::new();
- /// [1, 2][..].clone_into(&mut v);
- /// ```
- #[unstable(feature = "toowned_clone_into", reason = "recently added", issue = "41263")]
- fn clone_into(&self, target: &mut Self::Owned) {
- *target = self.to_owned();
- }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<T> ToOwned for T
-where
- T: Clone,
-{
- type Owned = T;
- fn to_owned(&self) -> T {
- self.clone()
- }
-
- fn clone_into(&self, target: &mut T) {
- target.clone_from(self);
- }
-}
-
-/// A clone-on-write smart pointer.
-///
-/// The type `Cow` is a smart pointer providing clone-on-write functionality: it
-/// can enclose and provide immutable access to borrowed data, and clone the
-/// data lazily when mutation or ownership is required. The type is designed to
-/// work with general borrowed data via the `Borrow` trait.
-///
-/// `Cow` implements `Deref`, which means that you can call
-/// non-mutating methods directly on the data it encloses. If mutation
-/// is desired, `to_mut` will obtain a mutable reference to an owned
-/// value, cloning if necessary.
-///
-/// If you need reference-counting pointers, note that
-/// [`Rc::make_mut`][crate::rc::Rc::make_mut] and
-/// [`Arc::make_mut`][crate::sync::Arc::make_mut] can provide clone-on-write
-/// functionality as well.
-///
-/// # Examples
-///
-/// ```
-/// use std::borrow::Cow;
-///
-/// fn abs_all(input: &mut Cow<[i32]>) {
-/// for i in 0..input.len() {
-/// let v = input[i];
-/// if v < 0 {
-/// // Clones into a vector if not already owned.
-/// input.to_mut()[i] = -v;
-/// }
-/// }
-/// }
-///
-/// // No clone occurs because `input` doesn't need to be mutated.
-/// let slice = [0, 1, 2];
-/// let mut input = Cow::from(&slice[..]);
-/// abs_all(&mut input);
-///
-/// // Clone occurs because `input` needs to be mutated.
-/// let slice = [-1, 0, 1];
-/// let mut input = Cow::from(&slice[..]);
-/// abs_all(&mut input);
-///
-/// // No clone occurs because `input` is already owned.
-/// let mut input = Cow::from(vec![-1, 0, 1]);
-/// abs_all(&mut input);
-/// ```
-///
-/// Another example showing how to keep `Cow` in a struct:
-///
-/// ```
-/// use std::borrow::Cow;
-///
-/// struct Items<'a, X: 'a> where [X]: ToOwned<Owned = Vec<X>> {
-/// values: Cow<'a, [X]>,
-/// }
-///
-/// impl<'a, X: Clone + 'a> Items<'a, X> where [X]: ToOwned<Owned = Vec<X>> {
-/// fn new(v: Cow<'a, [X]>) -> Self {
-/// Items { values: v }
-/// }
-/// }
-///
-/// // Creates a container from borrowed values of a slice
-/// let readonly = [1, 2];
-/// let borrowed = Items::new((&readonly[..]).into());
-/// match borrowed {
-/// Items { values: Cow::Borrowed(b) } => println!("borrowed {b:?}"),
-/// _ => panic!("expect borrowed value"),
-/// }
-///
-/// let mut clone_on_write = borrowed;
-/// // Mutates the data from slice into owned vec and pushes a new value on top
-/// clone_on_write.values.to_mut().push(3);
-/// println!("clone_on_write = {:?}", clone_on_write.values);
-///
-/// // The data was mutated. Let's check it out.
-/// match clone_on_write {
-/// Items { values: Cow::Owned(_) } => println!("clone_on_write contains owned data"),
-/// _ => panic!("expect owned data"),
-/// }
-/// ```
-#[stable(feature = "rust1", since = "1.0.0")]
-#[cfg_attr(not(test), rustc_diagnostic_item = "Cow")]
-pub enum Cow<'a, B: ?Sized + 'a>
-where
- B: ToOwned,
-{
- /// Borrowed data.
- #[stable(feature = "rust1", since = "1.0.0")]
- Borrowed(#[stable(feature = "rust1", since = "1.0.0")] &'a B),
-
- /// Owned data.
- #[stable(feature = "rust1", since = "1.0.0")]
- Owned(#[stable(feature = "rust1", since = "1.0.0")] <B as ToOwned>::Owned),
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<B: ?Sized + ToOwned> Clone for Cow<'_, B> {
- fn clone(&self) -> Self {
- match *self {
- Borrowed(b) => Borrowed(b),
- Owned(ref o) => {
- let b: &B = o.borrow();
- Owned(b.to_owned())
- }
- }
- }
-
- fn clone_from(&mut self, source: &Self) {
- match (self, source) {
- (&mut Owned(ref mut dest), &Owned(ref o)) => o.borrow().clone_into(dest),
- (t, s) => *t = s.clone(),
- }
- }
-}
-
-impl<B: ?Sized + ToOwned> Cow<'_, B> {
- /// Returns true if the data is borrowed, i.e. if `to_mut` would require additional work.
- ///
- /// # Examples
- ///
- /// ```
- /// #![feature(cow_is_borrowed)]
- /// use std::borrow::Cow;
- ///
- /// let cow = Cow::Borrowed("moo");
- /// assert!(cow.is_borrowed());
- ///
- /// let bull: Cow<'_, str> = Cow::Owned("...moo?".to_string());
- /// assert!(!bull.is_borrowed());
- /// ```
- #[unstable(feature = "cow_is_borrowed", issue = "65143")]
- #[rustc_const_unstable(feature = "const_cow_is_borrowed", issue = "65143")]
- pub const fn is_borrowed(&self) -> bool {
- match *self {
- Borrowed(_) => true,
- Owned(_) => false,
- }
- }
-
- /// Returns true if the data is owned, i.e. if `to_mut` would be a no-op.
- ///
- /// # Examples
- ///
- /// ```
- /// #![feature(cow_is_borrowed)]
- /// use std::borrow::Cow;
- ///
- /// let cow: Cow<'_, str> = Cow::Owned("moo".to_string());
- /// assert!(cow.is_owned());
- ///
- /// let bull = Cow::Borrowed("...moo?");
- /// assert!(!bull.is_owned());
- /// ```
- #[unstable(feature = "cow_is_borrowed", issue = "65143")]
- #[rustc_const_unstable(feature = "const_cow_is_borrowed", issue = "65143")]
- pub const fn is_owned(&self) -> bool {
- !self.is_borrowed()
- }
-
- /// Acquires a mutable reference to the owned form of the data.
- ///
- /// Clones the data if it is not already owned.
- ///
- /// # Examples
- ///
- /// ```
- /// use std::borrow::Cow;
- ///
- /// let mut cow = Cow::Borrowed("foo");
- /// cow.to_mut().make_ascii_uppercase();
- ///
- /// assert_eq!(
- /// cow,
- /// Cow::Owned(String::from("FOO")) as Cow<str>
- /// );
- /// ```
- #[stable(feature = "rust1", since = "1.0.0")]
- pub fn to_mut(&mut self) -> &mut <B as ToOwned>::Owned {
- match *self {
- Borrowed(borrowed) => {
- *self = Owned(borrowed.to_owned());
- match *self {
- Borrowed(..) => unreachable!(),
- Owned(ref mut owned) => owned,
- }
- }
- Owned(ref mut owned) => owned,
- }
- }
-
- /// Extracts the owned data.
- ///
- /// Clones the data if it is not already owned.
- ///
- /// # Examples
- ///
- /// Calling `into_owned` on a `Cow::Borrowed` returns a clone of the borrowed data:
- ///
- /// ```
- /// use std::borrow::Cow;
- ///
- /// let s = "Hello world!";
- /// let cow = Cow::Borrowed(s);
- ///
- /// assert_eq!(
- /// cow.into_owned(),
- /// String::from(s)
- /// );
- /// ```
- ///
- /// Calling `into_owned` on a `Cow::Owned` returns the owned data. The data is moved out of the
- /// `Cow` without being cloned.
- ///
- /// ```
- /// use std::borrow::Cow;
- ///
- /// let s = "Hello world!";
- /// let cow: Cow<str> = Cow::Owned(String::from(s));
- ///
- /// assert_eq!(
- /// cow.into_owned(),
- /// String::from(s)
- /// );
- /// ```
- #[stable(feature = "rust1", since = "1.0.0")]
- pub fn into_owned(self) -> <B as ToOwned>::Owned {
- match self {
- Borrowed(borrowed) => borrowed.to_owned(),
- Owned(owned) => owned,
- }
- }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-#[rustc_const_unstable(feature = "const_deref", issue = "88955")]
-impl<B: ?Sized + ToOwned> const Deref for Cow<'_, B>
-where
- B::Owned: ~const Borrow<B>,
-{
- type Target = B;
-
- fn deref(&self) -> &B {
- match *self {
- Borrowed(borrowed) => borrowed,
- Owned(ref owned) => owned.borrow(),
- }
- }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<B: ?Sized> Eq for Cow<'_, B> where B: Eq + ToOwned {}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<B: ?Sized> Ord for Cow<'_, B>
-where
- B: Ord + ToOwned,
-{
- #[inline]
- fn cmp(&self, other: &Self) -> Ordering {
- Ord::cmp(&**self, &**other)
- }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<'a, 'b, B: ?Sized, C: ?Sized> PartialEq<Cow<'b, C>> for Cow<'a, B>
-where
- B: PartialEq<C> + ToOwned,
- C: ToOwned,
-{
- #[inline]
- fn eq(&self, other: &Cow<'b, C>) -> bool {
- PartialEq::eq(&**self, &**other)
- }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<'a, B: ?Sized> PartialOrd for Cow<'a, B>
-where
- B: PartialOrd + ToOwned,
-{
- #[inline]
- fn partial_cmp(&self, other: &Cow<'a, B>) -> Option<Ordering> {
- PartialOrd::partial_cmp(&**self, &**other)
- }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<B: ?Sized> fmt::Debug for Cow<'_, B>
-where
- B: fmt::Debug + ToOwned<Owned: fmt::Debug>,
-{
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- match *self {
- Borrowed(ref b) => fmt::Debug::fmt(b, f),
- Owned(ref o) => fmt::Debug::fmt(o, f),
- }
- }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<B: ?Sized> fmt::Display for Cow<'_, B>
-where
- B: fmt::Display + ToOwned<Owned: fmt::Display>,
-{
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- match *self {
- Borrowed(ref b) => fmt::Display::fmt(b, f),
- Owned(ref o) => fmt::Display::fmt(o, f),
- }
- }
-}
-
-#[stable(feature = "default", since = "1.11.0")]
-impl<B: ?Sized> Default for Cow<'_, B>
-where
- B: ToOwned<Owned: Default>,
-{
- /// Creates an owned Cow<'a, B> with the default value for the contained owned value.
- fn default() -> Self {
- Owned(<B as ToOwned>::Owned::default())
- }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<B: ?Sized> Hash for Cow<'_, B>
-where
- B: Hash + ToOwned,
-{
- #[inline]
- fn hash<H: Hasher>(&self, state: &mut H) {
- Hash::hash(&**self, state)
- }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<T: ?Sized + ToOwned> AsRef<T> for Cow<'_, T> {
- fn as_ref(&self) -> &T {
- self
- }
-}
-
-#[cfg(not(no_global_oom_handling))]
-#[stable(feature = "cow_add", since = "1.14.0")]
-impl<'a> Add<&'a str> for Cow<'a, str> {
- type Output = Cow<'a, str>;
-
- #[inline]
- fn add(mut self, rhs: &'a str) -> Self::Output {
- self += rhs;
- self
- }
-}
-
-#[cfg(not(no_global_oom_handling))]
-#[stable(feature = "cow_add", since = "1.14.0")]
-impl<'a> Add<Cow<'a, str>> for Cow<'a, str> {
- type Output = Cow<'a, str>;
-
- #[inline]
- fn add(mut self, rhs: Cow<'a, str>) -> Self::Output {
- self += rhs;
- self
- }
-}
-
-#[cfg(not(no_global_oom_handling))]
-#[stable(feature = "cow_add", since = "1.14.0")]
-impl<'a> AddAssign<&'a str> for Cow<'a, str> {
- fn add_assign(&mut self, rhs: &'a str) {
- if self.is_empty() {
- *self = Cow::Borrowed(rhs)
- } else if !rhs.is_empty() {
- if let Cow::Borrowed(lhs) = *self {
- let mut s = String::with_capacity(lhs.len() + rhs.len());
- s.push_str(lhs);
- *self = Cow::Owned(s);
- }
- self.to_mut().push_str(rhs);
- }
- }
-}
-
-#[cfg(not(no_global_oom_handling))]
-#[stable(feature = "cow_add", since = "1.14.0")]
-impl<'a> AddAssign<Cow<'a, str>> for Cow<'a, str> {
- fn add_assign(&mut self, rhs: Cow<'a, str>) {
- if self.is_empty() {
- *self = rhs
- } else if !rhs.is_empty() {
- if let Cow::Borrowed(lhs) = *self {
- let mut s = String::with_capacity(lhs.len() + rhs.len());
- s.push_str(lhs);
- *self = Cow::Owned(s);
- }
- self.to_mut().push_str(&rhs);
- }
- }
-}
diff --git a/rust/alloc/lib.rs b/rust/alloc/lib.rs
index 233bcd5e4654..3aebf83c9967 100644
--- a/rust/alloc/lib.rs
+++ b/rust/alloc/lib.rs
@@ -100,7 +100,7 @@
#![cfg_attr(not(no_global_oom_handling), feature(const_alloc_error))]
#![feature(const_box)]
#![cfg_attr(not(no_global_oom_handling), feature(const_btree_new))]
-#![feature(const_cow_is_borrowed)]
+#![cfg_attr(not(no_borrow), feature(const_cow_is_borrowed))]
#![feature(const_convert)]
#![feature(const_size_of_val)]
#![feature(const_align_of_val)]
@@ -215,6 +215,7 @@ pub mod boxed;
mod boxed {
pub use std::boxed::Box;
}
+#[cfg(not(no_borrow))]
pub mod borrow;
pub mod collections;
#[cfg(not(no_global_oom_handling))]
diff --git a/rust/alloc/vec/mod.rs b/rust/alloc/vec/mod.rs
index 8ac6c1e3b2a8..f77c7368d534 100644
--- a/rust/alloc/vec/mod.rs
+++ b/rust/alloc/vec/mod.rs
@@ -72,6 +72,7 @@ use core::ptr::{self, NonNull};
use core::slice::{self, SliceIndex};
use crate::alloc::{Allocator, Global};
+#[cfg(not(no_borrow))]
use crate::borrow::{Cow, ToOwned};
use crate::boxed::Box;
use crate::collections::TryReserveError;
@@ -94,6 +95,7 @@ pub use self::drain::Drain;
mod drain;
+#[cfg(not(no_borrow))]
#[cfg(not(no_global_oom_handling))]
mod cow;
@@ -3103,6 +3105,7 @@ impl<T, const N: usize> From<[T; N]> for Vec<T> {
}
}
+#[cfg(not(no_borrow))]
#[stable(feature = "vec_from_cow_slice", since = "1.14.0")]
impl<'a, T> From<Cow<'a, [T]>> for Vec<T>
where
diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index c48bc284214a..75d85bd6c592 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -7,6 +7,7 @@
*/
#include <linux/slab.h>
+#include <linux/refcount.h>
/* `bindgen` gets confused at certain things. */
const gfp_t BINDINGS_GFP_KERNEL = GFP_KERNEL;
diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs
index 6c50ee62c56b..7b246454e009 100644
--- a/rust/bindings/lib.rs
+++ b/rust/bindings/lib.rs
@@ -41,6 +41,7 @@ mod bindings_raw {
#[allow(dead_code)]
mod bindings_helper {
// Import the generated bindings for types.
+ use super::bindings_raw::*;
include!(concat!(
env!("OBJTREE"),
"/rust/bindings/bindings_helpers_generated.rs"
diff --git a/rust/compiler_builtins.rs b/rust/compiler_builtins.rs
index f8f39a3e6855..43378357ece9 100644
--- a/rust/compiler_builtins.rs
+++ b/rust/compiler_builtins.rs
@@ -28,7 +28,7 @@ macro_rules! define_panicking_intrinsics(
($reason: tt, { $($ident: ident, )* }) => {
$(
#[doc(hidden)]
- #[no_mangle]
+ #[export_name = concat!("__rust", stringify!($ident))]
pub extern "C" fn $ident() {
panic!($reason);
}
@@ -61,3 +61,6 @@ define_panicking_intrinsics!("`u128` should not be used", {
__udivti3,
__umodti3,
});
+
+// NOTE: if you are adding a new intrinsic here, you should also add it to
+// `redirect-intrinsics` in `rust/Makefile`.
diff --git a/rust/helpers.c b/rust/helpers.c
index b4f15eee2ffd..09a4d93f9d62 100644
--- a/rust/helpers.c
+++ b/rust/helpers.c
@@ -20,6 +20,7 @@
#include <linux/bug.h>
#include <linux/build_bug.h>
+#include <linux/refcount.h>
__noreturn void rust_helper_BUG(void)
{
@@ -27,6 +28,24 @@ __noreturn void rust_helper_BUG(void)
}
EXPORT_SYMBOL_GPL(rust_helper_BUG);
+refcount_t rust_helper_REFCOUNT_INIT(int n)
+{
+ return (refcount_t)REFCOUNT_INIT(n);
+}
+EXPORT_SYMBOL_GPL(rust_helper_REFCOUNT_INIT);
+
+void rust_helper_refcount_inc(refcount_t *r)
+{
+ refcount_inc(r);
+}
+EXPORT_SYMBOL_GPL(rust_helper_refcount_inc);
+
+bool rust_helper_refcount_dec_and_test(refcount_t *r)
+{
+ return refcount_dec_and_test(r);
+}
+EXPORT_SYMBOL_GPL(rust_helper_refcount_dec_and_test);
+
/*
* We use `bindgen`'s `--size_t-is-usize` option to bind the C `size_t` type
* as the Rust `usize` type, so we can use it in contexts where Rust
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index 53040fa9e897..223564f9f0cc 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -13,7 +13,12 @@
#![no_std]
#![feature(allocator_api)]
+#![feature(coerce_unsized)]
#![feature(core_ffi_c)]
+#![feature(dispatch_from_dyn)]
+#![feature(generic_associated_types)]
+#![feature(receiver_trait)]
+#![feature(unsize)]
// Ensure conditional compilation based on the kernel configuration works;
// otherwise we may silently break things like initcall handling.
@@ -31,6 +36,7 @@ mod static_assert;
#[doc(hidden)]
pub mod std_vendor;
pub mod str;
+pub mod sync;
pub mod types;
#[doc(hidden)]
diff --git a/rust/kernel/prelude.rs b/rust/kernel/prelude.rs
index 7a90249ee9b9..0bc1c97e5604 100644
--- a/rust/kernel/prelude.rs
+++ b/rust/kernel/prelude.rs
@@ -11,15 +11,21 @@
//! use kernel::prelude::*;
//! ```
+#[doc(no_inline)]
pub use core::pin::Pin;
+#[doc(no_inline)]
pub use alloc::{boxed::Box, vec::Vec};
+#[doc(no_inline)]
pub use macros::{module, vtable};
pub use super::build_assert;
-pub use super::{dbg, pr_alert, pr_crit, pr_debug, pr_emerg, pr_err, pr_info, pr_notice, pr_warn};
+// `super::std_vendor` is hidden, which makes the macro inline for some reason.
+#[doc(no_inline)]
+pub use super::dbg;
+pub use super::{pr_alert, pr_crit, pr_debug, pr_emerg, pr_err, pr_info, pr_notice, pr_warn};
pub use super::static_assert;
diff --git a/rust/kernel/print.rs b/rust/kernel/print.rs
index 29bf9c2e8aee..30103325696d 100644
--- a/rust/kernel/print.rs
+++ b/rust/kernel/print.rs
@@ -142,17 +142,24 @@ pub fn call_printk_cont(args: fmt::Arguments<'_>) {
macro_rules! print_macro (
// The non-continuation cases (most of them, e.g. `INFO`).
($format_string:path, false, $($arg:tt)+) => (
- // SAFETY: This hidden macro should only be called by the documented
- // printing macros which ensure the format string is one of the fixed
- // ones. All `__LOG_PREFIX`s are null-terminated as they are generated
- // by the `module!` proc macro or fixed values defined in a kernel
- // crate.
- unsafe {
- $crate::print::call_printk(
- &$format_string,
- crate::__LOG_PREFIX,
- format_args!($($arg)+),
- );
+ // To remain sound, `arg`s must be expanded outside the `unsafe` block.
+ // Typically one would use a `let` binding for that; however, `format_args!`
+ // takes borrows on the arguments, but does not extend the scope of temporaries.
+ // Therefore, a `match` expression is used to keep them around, since
+ // the scrutinee is kept until the end of the `match`.
+ match format_args!($($arg)+) {
+ // SAFETY: This hidden macro should only be called by the documented
+ // printing macros which ensure the format string is one of the fixed
+ // ones. All `__LOG_PREFIX`s are null-terminated as they are generated
+ // by the `module!` proc macro or fixed values defined in a kernel
+ // crate.
+ args => unsafe {
+ $crate::print::call_printk(
+ &$format_string,
+ crate::__LOG_PREFIX,
+ args,
+ );
+ }
}
);
diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs
new file mode 100644
index 000000000000..33da23e3076d
--- /dev/null
+++ b/rust/kernel/sync.rs
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Synchronisation primitives.
+//!
+//! This module contains the kernel APIs related to synchronisation that have been ported or
+//! wrapped for usage by Rust code in the kernel.
+
+mod arc;
+
+pub use arc::{Arc, ArcBorrow, UniqueArc};
diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs
new file mode 100644
index 000000000000..f2f1c83d72ba
--- /dev/null
+++ b/rust/kernel/sync/arc.rs
@@ -0,0 +1,524 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! A reference-counted pointer.
+//!
+//! This module implements a way for users to create reference-counted objects and pointers to
+//! them. Such a pointer automatically increments and decrements the count, and drops the
+//! underlying object when it reaches zero. It is also safe to use concurrently from multiple
+//! threads.
+//!
+//! It is different from the standard library's [`Arc`] in a few ways:
+//! 1. It is backed by the kernel's `refcount_t` type.
+//! 2. It does not support weak references, which allows it to be half the size.
+//! 3. It saturates the reference count instead of aborting when it goes over a threshold.
+//! 4. It does not provide a `get_mut` method, so the ref counted object is pinned.
+//!
+//! [`Arc`]: https://doc.rust-lang.org/std/sync/struct.Arc.html
+
+use crate::{
+ bindings,
+ error::Result,
+ types::{ForeignOwnable, Opaque},
+};
+use alloc::boxed::Box;
+use core::{
+ marker::{PhantomData, Unsize},
+ mem::{ManuallyDrop, MaybeUninit},
+ ops::{Deref, DerefMut},
+ pin::Pin,
+ ptr::NonNull,
+};
+
+/// A reference-counted pointer to an instance of `T`.
+///
+/// The reference count is incremented when new instances of [`Arc`] are created, and decremented
+/// when they are dropped. When the count reaches zero, the underlying `T` is also dropped.
+///
+/// # Invariants
+///
+/// The reference count on an instance of [`Arc`] is always non-zero.
+/// The object pointed to by [`Arc`] is always pinned.
+///
+/// # Examples
+///
+/// ```
+/// use kernel::sync::Arc;
+///
+/// struct Example {
+/// a: u32,
+/// b: u32,
+/// }
+///
+/// // Create a ref-counted instance of `Example`.
+/// let obj = Arc::try_new(Example { a: 10, b: 20 })?;
+///
+/// // Get a new pointer to `obj` and increment the refcount.
+/// let cloned = obj.clone();
+///
+/// // Assert that both `obj` and `cloned` point to the same underlying object.
+/// assert!(core::ptr::eq(&*obj, &*cloned));
+///
+/// // Destroy `obj` and decrement its refcount.
+/// drop(obj);
+///
+/// // Check that the values are still accessible through `cloned`.
+/// assert_eq!(cloned.a, 10);
+/// assert_eq!(cloned.b, 20);
+///
+/// // The refcount drops to zero when `cloned` goes out of scope, and the memory is freed.
+/// ```
+///
+/// Using `Arc<T>` as the type of `self`:
+///
+/// ```
+/// use kernel::sync::Arc;
+///
+/// struct Example {
+/// a: u32,
+/// b: u32,
+/// }
+///
+/// impl Example {
+/// fn take_over(self: Arc<Self>) {
+/// // ...
+/// }
+///
+/// fn use_reference(self: &Arc<Self>) {
+/// // ...
+/// }
+/// }
+///
+/// let obj = Arc::try_new(Example { a: 10, b: 20 })?;
+/// obj.use_reference();
+/// obj.take_over();
+/// ```
+///
+/// Coercion from `Arc<Example>` to `Arc<dyn MyTrait>`:
+///
+/// ```
+/// use kernel::sync::{Arc, ArcBorrow};
+///
+/// trait MyTrait {
+/// // Trait has a function whose `self` type is `Arc<Self>`.
+/// fn example1(self: Arc<Self>) {}
+///
+/// // Trait has a function whose `self` type is `ArcBorrow<'_, Self>`.
+/// fn example2(self: ArcBorrow<'_, Self>) {}
+/// }
+///
+/// struct Example;
+/// impl MyTrait for Example {}
+///
+/// // `obj` has type `Arc<Example>`.
+/// let obj: Arc<Example> = Arc::try_new(Example)?;
+///
+/// // `coerced` has type `Arc<dyn MyTrait>`.
+/// let coerced: Arc<dyn MyTrait> = obj;
+/// ```
+pub struct Arc<T: ?Sized> {
+ ptr: NonNull<ArcInner<T>>,
+ _p: PhantomData<ArcInner<T>>,
+}
+
+#[repr(C)]
+struct ArcInner<T: ?Sized> {
+ refcount: Opaque<bindings::refcount_t>,
+ data: T,
+}
+
+// This is to allow [`Arc`] (and variants) to be used as the type of `self`.
+impl<T: ?Sized> core::ops::Receiver for Arc<T> {}
+
+// This is to allow coercion from `Arc<T>` to `Arc<U>` if `T` can be converted to the
+// dynamically-sized type (DST) `U`.
+impl<T: ?Sized + Unsize<U>, U: ?Sized> core::ops::CoerceUnsized<Arc<U>> for Arc<T> {}
+
+// This is to allow `Arc<U>` to be dispatched on when `Arc<T>` can be coerced into `Arc<U>`.
+impl<T: ?Sized + Unsize<U>, U: ?Sized> core::ops::DispatchFromDyn<Arc<U>> for Arc<T> {}
+
+// SAFETY: It is safe to send `Arc<T>` to another thread when the underlying `T` is `Sync` because
+// it effectively means sharing `&T` (which is safe because `T` is `Sync`); additionally, it needs
+// `T` to be `Send` because any thread that has an `Arc<T>` may ultimately access `T` directly, for
+// example, when the reference count reaches zero and `T` is dropped.
+unsafe impl<T: ?Sized + Sync + Send> Send for Arc<T> {}
+
+// SAFETY: It is safe to send `&Arc<T>` to another thread when the underlying `T` is `Sync` for the
+// same reason as above. `T` needs to be `Send` as well because a thread can clone an `&Arc<T>`
+// into an `Arc<T>`, which may lead to `T` being accessed by the same reasoning as above.
+unsafe impl<T: ?Sized + Sync + Send> Sync for Arc<T> {}
+
+impl<T> Arc<T> {
+ /// Constructs a new reference counted instance of `T`.
+ pub fn try_new(contents: T) -> Result<Self> {
+ // INVARIANT: The refcount is initialised to a non-zero value.
+ let value = ArcInner {
+ // SAFETY: There are no safety requirements for this FFI call.
+ refcount: Opaque::new(unsafe { bindings::REFCOUNT_INIT(1) }),
+ data: contents,
+ };
+
+ let inner = Box::try_new(value)?;
+
+ // SAFETY: We just created `inner` with a reference count of 1, which is owned by the new
+ // `Arc` object.
+ Ok(unsafe { Self::from_inner(Box::leak(inner).into()) })
+ }
+}
+
+impl<T: ?Sized> Arc<T> {
+ /// Constructs a new [`Arc`] from an existing [`ArcInner`].
+ ///
+ /// # Safety
+ ///
+ /// The caller must ensure that `inner` points to a valid location and has a non-zero reference
+ /// count, one of which will be owned by the new [`Arc`] instance.
+ unsafe fn from_inner(inner: NonNull<ArcInner<T>>) -> Self {
+ // INVARIANT: By the safety requirements, the invariants hold.
+ Arc {
+ ptr: inner,
+ _p: PhantomData,
+ }
+ }
+
+ /// Returns an [`ArcBorrow`] from the given [`Arc`].
+ ///
+ /// This is useful when the argument of a function call is an [`ArcBorrow`] (e.g., in a method
+ /// receiver), but we have an [`Arc`] instead. Getting an [`ArcBorrow`] is free when optimised.
+ #[inline]
+ pub fn as_arc_borrow(&self) -> ArcBorrow<'_, T> {
+ // SAFETY: The constraint that the lifetime of the shared reference must outlive that of
+ // the returned `ArcBorrow` ensures that the object remains alive and that no mutable
+ // reference can be created.
+ unsafe { ArcBorrow::new(self.ptr) }
+ }
+}
+
+impl<T: 'static> ForeignOwnable for Arc<T> {
+ type Borrowed<'a> = ArcBorrow<'a, T>;
+
+ fn into_foreign(self) -> *const core::ffi::c_void {
+ ManuallyDrop::new(self).ptr.as_ptr() as _
+ }
+
+ unsafe fn borrow<'a>(ptr: *const core::ffi::c_void) -> ArcBorrow<'a, T> {
+ // SAFETY: By the safety requirement of this function, we know that `ptr` came from
+ // a previous call to `Arc::into_foreign`.
+ let inner = NonNull::new(ptr as *mut ArcInner<T>).unwrap();
+
+ // SAFETY: The safety requirements of `from_foreign` ensure that the object remains alive
+ // for the lifetime of the returned value. Additionally, the safety requirements of
+ // `ForeignOwnable::borrow_mut` ensure that no new mutable references are created.
+ unsafe { ArcBorrow::new(inner) }
+ }
+
+ unsafe fn from_foreign(ptr: *const core::ffi::c_void) -> Self {
+ // SAFETY: By the safety requirement of this function, we know that `ptr` came from
+ // a previous call to `Arc::into_foreign`, which guarantees that `ptr` is valid and
+ // holds a reference count increment that is transferrable to us.
+ unsafe { Self::from_inner(NonNull::new(ptr as _).unwrap()) }
+ }
+}
+
+impl<T: ?Sized> Deref for Arc<T> {
+ type Target = T;
+
+ fn deref(&self) -> &Self::Target {
+ // SAFETY: By the type invariant, there is necessarily a reference to the object, so it is
+ // safe to dereference it.
+ unsafe { &self.ptr.as_ref().data }
+ }
+}
+
+impl<T: ?Sized> Clone for Arc<T> {
+ fn clone(&self) -> Self {
+ // INVARIANT: C `refcount_inc` saturates the refcount, so it cannot overflow to zero.
+ // SAFETY: By the type invariant, there is necessarily a reference to the object, so it is
+ // safe to increment the refcount.
+ unsafe { bindings::refcount_inc(self.ptr.as_ref().refcount.get()) };
+
+ // SAFETY: We just incremented the refcount. This increment is now owned by the new `Arc`.
+ unsafe { Self::from_inner(self.ptr) }
+ }
+}
+
+impl<T: ?Sized> Drop for Arc<T> {
+ fn drop(&mut self) {
+ // SAFETY: By the type invariant, there is necessarily a reference to the object. We cannot
+ // touch `refcount` after it's decremented to a non-zero value because another thread/CPU
+ // may concurrently decrement it to zero and free it. It is ok to have a raw pointer to
+ // freed/invalid memory as long as it is never dereferenced.
+ let refcount = unsafe { self.ptr.as_ref() }.refcount.get();
+
+ // INVARIANT: If the refcount reaches zero, there are no other instances of `Arc`, and
+ // this instance is being dropped, so the broken invariant is not observable.
+ // SAFETY: Also by the type invariant, we are allowed to decrement the refcount.
+ let is_zero = unsafe { bindings::refcount_dec_and_test(refcount) };
+ if is_zero {
+ // The count reached zero, we must free the memory.
+ //
+ // SAFETY: The pointer was initialised from the result of `Box::leak`.
+ unsafe { Box::from_raw(self.ptr.as_ptr()) };
+ }
+ }
+}
+
+impl<T: ?Sized> From<UniqueArc<T>> for Arc<T> {
+ fn from(item: UniqueArc<T>) -> Self {
+ item.inner
+ }
+}
+
+impl<T: ?Sized> From<Pin<UniqueArc<T>>> for Arc<T> {
+ fn from(item: Pin<UniqueArc<T>>) -> Self {
+ // SAFETY: The type invariants of `Arc` guarantee that the data is pinned.
+ unsafe { Pin::into_inner_unchecked(item).inner }
+ }
+}
+
+/// A borrowed reference to an [`Arc`] instance.
+///
+/// For cases when one doesn't ever need to increment the refcount on the allocation, it is simpler
+/// to use just `&T`, which we can trivially get from an `Arc<T>` instance.
+///
+/// However, when one may need to increment the refcount, it is preferable to use an `ArcBorrow<T>`
+/// over `&Arc<T>` because the latter results in a double-indirection: a pointer (shared reference)
+/// to a pointer (`Arc<T>`) to the object (`T`). An [`ArcBorrow`] eliminates this double
+/// indirection while still allowing one to increment the refcount and getting an `Arc<T>` when/if
+/// needed.
+///
+/// # Invariants
+///
+/// There are no mutable references to the underlying [`Arc`], and it remains valid for the
+/// lifetime of the [`ArcBorrow`] instance.
+///
+/// # Example
+///
+/// ```
+/// use crate::sync::{Arc, ArcBorrow};
+///
+/// struct Example;
+///
+/// fn do_something(e: ArcBorrow<'_, Example>) -> Arc<Example> {
+/// e.into()
+/// }
+///
+/// let obj = Arc::try_new(Example)?;
+/// let cloned = do_something(obj.as_arc_borrow());
+///
+/// // Assert that both `obj` and `cloned` point to the same underlying object.
+/// assert!(core::ptr::eq(&*obj, &*cloned));
+/// ```
+///
+/// Using `ArcBorrow<T>` as the type of `self`:
+///
+/// ```
+/// use crate::sync::{Arc, ArcBorrow};
+///
+/// struct Example {
+/// a: u32,
+/// b: u32,
+/// }
+///
+/// impl Example {
+/// fn use_reference(self: ArcBorrow<'_, Self>) {
+/// // ...
+/// }
+/// }
+///
+/// let obj = Arc::try_new(Example { a: 10, b: 20 })?;
+/// obj.as_arc_borrow().use_reference();
+/// ```
+pub struct ArcBorrow<'a, T: ?Sized + 'a> {
+ inner: NonNull<ArcInner<T>>,
+ _p: PhantomData<&'a ()>,
+}
+
+// This is to allow [`ArcBorrow`] (and variants) to be used as the type of `self`.
+impl<T: ?Sized> core::ops::Receiver for ArcBorrow<'_, T> {}
+
+// This is to allow `ArcBorrow<U>` to be dispatched on when `ArcBorrow<T>` can be coerced into
+// `ArcBorrow<U>`.
+impl<T: ?Sized + Unsize<U>, U: ?Sized> core::ops::DispatchFromDyn<ArcBorrow<'_, U>>
+ for ArcBorrow<'_, T>
+{
+}
+
+impl<T: ?Sized> Clone for ArcBorrow<'_, T> {
+ fn clone(&self) -> Self {
+ *self
+ }
+}
+
+impl<T: ?Sized> Copy for ArcBorrow<'_, T> {}
+
+impl<T: ?Sized> ArcBorrow<'_, T> {
+ /// Creates a new [`ArcBorrow`] instance.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure the following for the lifetime of the returned [`ArcBorrow`] instance:
+ /// 1. That `inner` remains valid;
+ /// 2. That no mutable references to `inner` are created.
+ unsafe fn new(inner: NonNull<ArcInner<T>>) -> Self {
+ // INVARIANT: The safety requirements guarantee the invariants.
+ Self {
+ inner,
+ _p: PhantomData,
+ }
+ }
+}
+
+impl<T: ?Sized> From<ArcBorrow<'_, T>> for Arc<T> {
+ fn from(b: ArcBorrow<'_, T>) -> Self {
+ // SAFETY: The existence of `b` guarantees that the refcount is non-zero. `ManuallyDrop`
+ // guarantees that `drop` isn't called, so it's ok that the temporary `Arc` doesn't own the
+ // increment.
+ ManuallyDrop::new(unsafe { Arc::from_inner(b.inner) })
+ .deref()
+ .clone()
+ }
+}
+
+impl<T: ?Sized> Deref for ArcBorrow<'_, T> {
+ type Target = T;
+
+ fn deref(&self) -> &Self::Target {
+ // SAFETY: By the type invariant, the underlying object is still alive with no mutable
+ // references to it, so it is safe to create a shared reference.
+ unsafe { &self.inner.as_ref().data }
+ }
+}
+
+/// A refcounted object that is known to have a refcount of 1.
+///
+/// It is mutable and can be converted to an [`Arc`] so that it can be shared.
+///
+/// # Invariants
+///
+/// `inner` always has a reference count of 1.
+///
+/// # Examples
+///
+/// In the following example, we make changes to the inner object before turning it into an
+/// `Arc<Test>` object (after which point, it cannot be mutated directly). Note that `x.into()`
+/// cannot fail.
+///
+/// ```
+/// use kernel::sync::{Arc, UniqueArc};
+///
+/// struct Example {
+/// a: u32,
+/// b: u32,
+/// }
+///
+/// fn test() -> Result<Arc<Example>> {
+/// let mut x = UniqueArc::try_new(Example { a: 10, b: 20 })?;
+/// x.a += 1;
+/// x.b += 1;
+/// Ok(x.into())
+/// }
+///
+/// # test().unwrap();
+/// ```
+///
+/// In the following example we first allocate memory for a ref-counted `Example` but we don't
+/// initialise it on allocation. We do initialise it later with a call to [`UniqueArc::write`],
+/// followed by a conversion to `Arc<Example>`. This is particularly useful when allocation happens
+/// in one context (e.g., sleepable) and initialisation in another (e.g., atomic):
+///
+/// ```
+/// use kernel::sync::{Arc, UniqueArc};
+///
+/// struct Example {
+/// a: u32,
+/// b: u32,
+/// }
+///
+/// fn test() -> Result<Arc<Example>> {
+/// let x = UniqueArc::try_new_uninit()?;
+/// Ok(x.write(Example { a: 10, b: 20 }).into())
+/// }
+///
+/// # test().unwrap();
+/// ```
+///
+/// In the last example below, the caller gets a pinned instance of `Example` while converting to
+/// `Arc<Example>`; this is useful in scenarios where one needs a pinned reference during
+/// initialisation, for example, when initialising fields that are wrapped in locks.
+///
+/// ```
+/// use kernel::sync::{Arc, UniqueArc};
+///
+/// struct Example {
+/// a: u32,
+/// b: u32,
+/// }
+///
+/// fn test() -> Result<Arc<Example>> {
+/// let mut pinned = Pin::from(UniqueArc::try_new(Example { a: 10, b: 20 })?);
+/// // We can modify `pinned` because it is `Unpin`.
+/// pinned.as_mut().a += 1;
+/// Ok(pinned.into())
+/// }
+///
+/// # test().unwrap();
+/// ```
+pub struct UniqueArc<T: ?Sized> {
+ inner: Arc<T>,
+}
+
+impl<T> UniqueArc<T> {
+ /// Tries to allocate a new [`UniqueArc`] instance.
+ pub fn try_new(value: T) -> Result<Self> {
+ Ok(Self {
+ // INVARIANT: The newly-created object has a ref-count of 1.
+ inner: Arc::try_new(value)?,
+ })
+ }
+
+ /// Tries to allocate a new [`UniqueArc`] instance whose contents are not initialised yet.
+ pub fn try_new_uninit() -> Result<UniqueArc<MaybeUninit<T>>> {
+ Ok(UniqueArc::<MaybeUninit<T>> {
+ // INVARIANT: The newly-created object has a ref-count of 1.
+ inner: Arc::try_new(MaybeUninit::uninit())?,
+ })
+ }
+}
+
+impl<T> UniqueArc<MaybeUninit<T>> {
+ /// Converts a `UniqueArc<MaybeUninit<T>>` into a `UniqueArc<T>` by writing a value into it.
+ pub fn write(mut self, value: T) -> UniqueArc<T> {
+ self.deref_mut().write(value);
+ let inner = ManuallyDrop::new(self).inner.ptr;
+ UniqueArc {
+ // SAFETY: The new `Arc` is taking over `ptr` from `self.inner` (which won't be
+ // dropped). The types are compatible because `MaybeUninit<T>` is compatible with `T`.
+ inner: unsafe { Arc::from_inner(inner.cast()) },
+ }
+ }
+}
+
+impl<T: ?Sized> From<UniqueArc<T>> for Pin<UniqueArc<T>> {
+ fn from(obj: UniqueArc<T>) -> Self {
+ // SAFETY: It is not possible to move/replace `T` inside a `Pin<UniqueArc<T>>` (unless `T`
+ // is `Unpin`), so it is ok to convert it to `Pin<UniqueArc<T>>`.
+ unsafe { Pin::new_unchecked(obj) }
+ }
+}
+
+impl<T: ?Sized> Deref for UniqueArc<T> {
+ type Target = T;
+
+ fn deref(&self) -> &Self::Target {
+ self.inner.deref()
+ }
+}
+
+impl<T: ?Sized> DerefMut for UniqueArc<T> {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ // SAFETY: By the `Arc` type invariant, there is necessarily a reference to the object, so
+ // it is safe to dereference it. Additionally, we know there is only one reference when
+ // it's inside a `UniqueArc`, so it is safe to get a mutable reference.
+ unsafe { &mut self.inner.ptr.as_mut().data }
+ }
+}
diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs
index e84e51ec9716..9d0fdbc55843 100644
--- a/rust/kernel/types.rs
+++ b/rust/kernel/types.rs
@@ -2,7 +2,220 @@
//! Kernel types.
-use core::{cell::UnsafeCell, mem::MaybeUninit};
+use alloc::boxed::Box;
+use core::{
+ cell::UnsafeCell,
+ mem::MaybeUninit,
+ ops::{Deref, DerefMut},
+};
+
+/// Used to transfer ownership to and from foreign (non-Rust) languages.
+///
+/// Ownership is transferred from Rust to a foreign language by calling [`Self::into_foreign`] and
+/// later may be transferred back to Rust by calling [`Self::from_foreign`].
+///
+/// This trait is meant to be used in cases when Rust objects are stored in C objects and
+/// eventually "freed" back to Rust.
+pub trait ForeignOwnable: Sized {
+ /// Type of values borrowed between calls to [`ForeignOwnable::into_foreign`] and
+ /// [`ForeignOwnable::from_foreign`].
+ type Borrowed<'a>;
+
+ /// Converts a Rust-owned object to a foreign-owned one.
+ ///
+ /// The foreign representation is a pointer to void.
+ fn into_foreign(self) -> *const core::ffi::c_void;
+
+ /// Borrows a foreign-owned object.
+ ///
+ /// # Safety
+ ///
+ /// `ptr` must have been returned by a previous call to [`ForeignOwnable::into_foreign`] for
+ /// which a previous matching [`ForeignOwnable::from_foreign`] hasn't been called yet.
+ /// Additionally, all instances (if any) of values returned by [`ForeignOwnable::borrow_mut`]
+ /// for this object must have been dropped.
+ unsafe fn borrow<'a>(ptr: *const core::ffi::c_void) -> Self::Borrowed<'a>;
+
+ /// Mutably borrows a foreign-owned object.
+ ///
+ /// # Safety
+ ///
+ /// `ptr` must have been returned by a previous call to [`ForeignOwnable::into_foreign`] for
+ /// which a previous matching [`ForeignOwnable::from_foreign`] hasn't been called yet.
+ /// Additionally, all instances (if any) of values returned by [`ForeignOwnable::borrow`] and
+ /// [`ForeignOwnable::borrow_mut`] for this object must have been dropped.
+ unsafe fn borrow_mut(ptr: *const core::ffi::c_void) -> ScopeGuard<Self, fn(Self)> {
+ // SAFETY: The safety requirements ensure that `ptr` came from a previous call to
+ // `into_foreign`.
+ ScopeGuard::new_with_data(unsafe { Self::from_foreign(ptr) }, |d| {
+ d.into_foreign();
+ })
+ }
+
+ /// Converts a foreign-owned object back to a Rust-owned one.
+ ///
+ /// # Safety
+ ///
+ /// `ptr` must have been returned by a previous call to [`ForeignOwnable::into_foreign`] for
+ /// which a previous matching [`ForeignOwnable::from_foreign`] hasn't been called yet.
+ /// Additionally, all instances (if any) of values returned by [`ForeignOwnable::borrow`] and
+ /// [`ForeignOwnable::borrow_mut`] for this object must have been dropped.
+ unsafe fn from_foreign(ptr: *const core::ffi::c_void) -> Self;
+}
+
+impl<T: 'static> ForeignOwnable for Box<T> {
+ type Borrowed<'a> = &'a T;
+
+ fn into_foreign(self) -> *const core::ffi::c_void {
+ Box::into_raw(self) as _
+ }
+
+ unsafe fn borrow<'a>(ptr: *const core::ffi::c_void) -> &'a T {
+ // SAFETY: The safety requirements for this function ensure that the object is still alive,
+ // so it is safe to dereference the raw pointer.
+ // The safety requirements of `from_foreign` also ensure that the object remains alive for
+ // the lifetime of the returned value.
+ unsafe { &*ptr.cast() }
+ }
+
+ unsafe fn from_foreign(ptr: *const core::ffi::c_void) -> Self {
+ // SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous
+ // call to `Self::into_foreign`.
+ unsafe { Box::from_raw(ptr as _) }
+ }
+}
+
+impl ForeignOwnable for () {
+ type Borrowed<'a> = ();
+
+ fn into_foreign(self) -> *const core::ffi::c_void {
+ core::ptr::NonNull::dangling().as_ptr()
+ }
+
+ unsafe fn borrow<'a>(_: *const core::ffi::c_void) -> Self::Borrowed<'a> {}
+
+ unsafe fn from_foreign(_: *const core::ffi::c_void) -> Self {}
+}
+
+/// Runs a cleanup function/closure when dropped.
+///
+/// The [`ScopeGuard::dismiss`] function prevents the cleanup function from running.
+///
+/// # Examples
+///
+/// In the example below, we have multiple exit paths and we want to log regardless of which one is
+/// taken:
+/// ```
+/// # use kernel::ScopeGuard;
+/// fn example1(arg: bool) {
+/// let _log = ScopeGuard::new(|| pr_info!("example1 completed\n"));
+///
+/// if arg {
+/// return;
+/// }
+///
+/// pr_info!("Do something...\n");
+/// }
+///
+/// # example1(false);
+/// # example1(true);
+/// ```
+///
+/// In the example below, we want to log the same message on all early exits but a different one on
+/// the main exit path:
+/// ```
+/// # use kernel::ScopeGuard;
+/// fn example2(arg: bool) {
+/// let log = ScopeGuard::new(|| pr_info!("example2 returned early\n"));
+///
+/// if arg {
+/// return;
+/// }
+///
+/// // (Other early returns...)
+///
+/// log.dismiss();
+/// pr_info!("example2 no early return\n");
+/// }
+///
+/// # example2(false);
+/// # example2(true);
+/// ```
+///
+/// In the example below, we need a mutable object (the vector) to be accessible within the log
+/// function, so we wrap it in the [`ScopeGuard`]:
+/// ```
+/// # use kernel::ScopeGuard;
+/// fn example3(arg: bool) -> Result {
+/// let mut vec =
+/// ScopeGuard::new_with_data(Vec::new(), |v| pr_info!("vec had {} elements\n", v.len()));
+///
+/// vec.try_push(10u8)?;
+/// if arg {
+/// return Ok(());
+/// }
+/// vec.try_push(20u8)?;
+/// Ok(())
+/// }
+///
+/// # assert_eq!(example3(false), Ok(()));
+/// # assert_eq!(example3(true), Ok(()));
+/// ```
+///
+/// # Invariants
+///
+/// The value stored in the struct is nearly always `Some(_)`, except between
+/// [`ScopeGuard::dismiss`] and [`ScopeGuard::drop`]: in this case, it will be `None` as the value
+/// will have been returned to the caller. Since [`ScopeGuard::dismiss`] consumes the guard,
+/// callers won't be able to use it anymore.
+pub struct ScopeGuard<T, F: FnOnce(T)>(Option<(T, F)>);
+
+impl<T, F: FnOnce(T)> ScopeGuard<T, F> {
+ /// Creates a new guarded object wrapping the given data and with the given cleanup function.
+ pub fn new_with_data(data: T, cleanup_func: F) -> Self {
+ // INVARIANT: The struct is being initialised with `Some(_)`.
+ Self(Some((data, cleanup_func)))
+ }
+
+ /// Prevents the cleanup function from running and returns the guarded data.
+ pub fn dismiss(mut self) -> T {
+ // INVARIANT: This is the exception case in the invariant; it is not visible to callers
+ // because this function consumes `self`.
+ self.0.take().unwrap().0
+ }
+}
+
+impl ScopeGuard<(), fn(())> {
+ /// Creates a new guarded object with the given cleanup function.
+ pub fn new(cleanup: impl FnOnce()) -> ScopeGuard<(), impl FnOnce(())> {
+ ScopeGuard::new_with_data((), move |_| cleanup())
+ }
+}
+
+impl<T, F: FnOnce(T)> Deref for ScopeGuard<T, F> {
+ type Target = T;
+
+ fn deref(&self) -> &T {
+ // The type invariants guarantee that `unwrap` will succeed.
+ &self.0.as_ref().unwrap().0
+ }
+}
+
+impl<T, F: FnOnce(T)> DerefMut for ScopeGuard<T, F> {
+ fn deref_mut(&mut self) -> &mut T {
+ // The type invariants guarantee that `unwrap` will succeed.
+ &mut self.0.as_mut().unwrap().0
+ }
+}
+
+impl<T, F: FnOnce(T)> Drop for ScopeGuard<T, F> {
+ fn drop(&mut self) {
+ // Run the cleanup function if one is still present.
+ if let Some((data, cleanup)) = self.0.take() {
+ cleanup(data)
+ }
+ }
+}
/// Stores an opaque value.
///
diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c
index d52370cad0b6..a825dbd2c9cf 100644
--- a/samples/ftrace/ftrace-direct-multi-modify.c
+++ b/samples/ftrace/ftrace-direct-multi-modify.c
@@ -152,6 +152,7 @@ static void __exit ftrace_direct_multi_exit(void)
{
kthread_stop(simple_tsk);
unregister_ftrace_direct_multi(&direct, my_tramp);
+ ftrace_free_filter(&direct);
}
module_init(ftrace_direct_multi_init);
diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c
index ec1088922517..d955a2650605 100644
--- a/samples/ftrace/ftrace-direct-multi.c
+++ b/samples/ftrace/ftrace-direct-multi.c
@@ -79,6 +79,7 @@ static int __init ftrace_direct_multi_init(void)
static void __exit ftrace_direct_multi_exit(void)
{
unregister_ftrace_direct_multi(&direct, (unsigned long) my_tramp);
+ ftrace_free_filter(&direct);
}
module_init(ftrace_direct_multi_init);
diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst
index 836391e5d209..4815a8e32227 100644
--- a/scripts/Makefile.modinst
+++ b/scripts/Makefile.modinst
@@ -66,9 +66,13 @@ endif
# Don't stop modules_install even if we can't sign external modules.
#
ifeq ($(CONFIG_MODULE_SIG_ALL),y)
+ifeq ($(filter pkcs11:%, $(CONFIG_MODULE_SIG_KEY)),)
sig-key := $(if $(wildcard $(CONFIG_MODULE_SIG_KEY)),,$(srctree)/)$(CONFIG_MODULE_SIG_KEY)
+else
+sig-key := $(CONFIG_MODULE_SIG_KEY)
+endif
quiet_cmd_sign = SIGN $@
- cmd_sign = scripts/sign-file $(CONFIG_MODULE_SIG_HASH) $(sig-key) certs/signing_key.x509 $@ \
+ cmd_sign = scripts/sign-file $(CONFIG_MODULE_SIG_HASH) "$(sig-key)" certs/signing_key.x509 $@ \
$(if $(KBUILD_EXTMOD),|| true)
else
quiet_cmd_sign :=
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index 0ee296cf520c..43343e13c542 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -44,6 +44,7 @@ modpost-args = \
$(if $(CONFIG_MODVERSIONS),-m) \
$(if $(CONFIG_MODULE_SRCVERSION_ALL),-a) \
$(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \
+ $(if $(KBUILD_MODPOST_WARN),-w) \
$(if $(KBUILD_NSDEPS),-d $(MODULES_NSDEPS)) \
$(if $(CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS)$(KBUILD_NSDEPS),-N) \
-o $@
@@ -55,10 +56,6 @@ ifneq ($(findstring i,$(filter-out --%,$(MAKEFLAGS))),)
modpost-args += -n
endif
-ifneq ($(KBUILD_MODPOST_WARN)$(missing-input),)
-modpost-args += -w
-endif
-
# Read out modules.order to pass in modpost.
# Otherwise, allmodconfig would fail with "Argument list too long".
ifdef KBUILD_MODULES
@@ -124,6 +121,10 @@ modpost-args += -e $(addprefix -i , $(KBUILD_EXTRA_SYMBOLS))
endif # ($(KBUILD_EXTMOD),)
+ifneq ($(missing-input),)
+modpost-args += -w
+endif
+
quiet_cmd_modpost = MODPOST $@
cmd_modpost = \
$(if $(missing-input), \
diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux
index 49946cb96844..10176dec97ea 100644
--- a/scripts/Makefile.vmlinux
+++ b/scripts/Makefile.vmlinux
@@ -18,6 +18,7 @@ quiet_cmd_cc_o_c = CC $@
$(call if_changed_dep,cc_o_c)
ifdef CONFIG_MODULES
+KASAN_SANITIZE_.vmlinux.export.o := n
targets += .vmlinux.export.o
vmlinux: .vmlinux.export.o
endif
diff --git a/scripts/atomic/atomics.tbl b/scripts/atomic/atomics.tbl
index fbee2f6190d9..fbee2f6190d9 100755..100644
--- a/scripts/atomic/atomics.tbl
+++ b/scripts/atomic/atomics.tbl
diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h
index 9a1895747b15..84c730da36dd 100644
--- a/scripts/gcc-plugins/gcc-common.h
+++ b/scripts/gcc-plugins/gcc-common.h
@@ -71,7 +71,9 @@
#include "varasm.h"
#include "stor-layout.h"
#include "internal-fn.h"
+#include "gimple.h"
#include "gimple-expr.h"
+#include "gimple-iterator.h"
#include "gimple-fold.h"
#include "context.h"
#include "tree-ssa-alias.h"
@@ -85,10 +87,8 @@
#include "tree-eh.h"
#include "stmt.h"
#include "gimplify.h"
-#include "gimple.h"
#include "tree-phinodes.h"
#include "tree-cfg.h"
-#include "gimple-iterator.h"
#include "gimple-ssa.h"
#include "ssa-iterators.h"
diff --git a/scripts/gdb/linux/cpus.py b/scripts/gdb/linux/cpus.py
index 15fc4626d236..9ee99f9fae8d 100644
--- a/scripts/gdb/linux/cpus.py
+++ b/scripts/gdb/linux/cpus.py
@@ -163,7 +163,7 @@ def get_current_task(cpu):
task_ptr_type = task_type.get_type().pointer()
if utils.is_target_arch("x86"):
- var_ptr = gdb.parse_and_eval("&current_task")
+ var_ptr = gdb.parse_and_eval("&pcpu_hot.current_task")
return per_cpu(var_ptr, cpu).dereference()
elif utils.is_target_arch("aarch64"):
current_task_addr = gdb.parse_and_eval("$SP_EL0")
diff --git a/scripts/jobserver-exec b/scripts/jobserver-exec
index 4192855f5b8b..7eca035472d3 100755
--- a/scripts/jobserver-exec
+++ b/scripts/jobserver-exec
@@ -26,11 +26,20 @@ try:
# If the MAKEFLAGS variable contains multiple instances of the
# --jobserver-auth= option, the last one is relevant.
fds = opts[-1].split("=", 1)[1]
- reader, writer = [int(x) for x in fds.split(",", 1)]
- # Open a private copy of reader to avoid setting nonblocking
- # on an unexpecting process with the same reader fd.
- reader = os.open("/proc/self/fd/%d" % (reader),
- os.O_RDONLY | os.O_NONBLOCK)
+
+ # Starting with GNU Make 4.4, named pipes are used for reader and writer.
+ # Example argument: --jobserver-auth=fifo:/tmp/GMfifo8134
+ _, _, path = fds.partition('fifo:')
+
+ if path:
+ reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK)
+ writer = os.open(path, os.O_WRONLY)
+ else:
+ reader, writer = [int(x) for x in fds.split(",", 1)]
+ # Open a private copy of reader to avoid setting nonblocking
+ # on an unexpecting process with the same reader fd.
+ reader = os.open("/proc/self/fd/%d" % (reader),
+ os.O_RDONLY | os.O_NONBLOCK)
# Read out as many jobserver slots as possible.
while True:
diff --git a/scripts/kconfig/.gitignore b/scripts/kconfig/.gitignore
index c8a3f9cd52f0..0b2ff775b2e3 100644
--- a/scripts/kconfig/.gitignore
+++ b/scripts/kconfig/.gitignore
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
/conf
/[gmnq]conf
+/[gmnq]conf-bin
/[gmnq]conf-cflags
/[gmnq]conf-libs
-/qconf-bin
/qconf-moc.cc
diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index 0b1d15efaeb0..af1c96198f49 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -209,7 +209,7 @@ $(obj)/gconf: | $(obj)/gconf-libs
$(obj)/gconf.o: | $(obj)/gconf-cflags
# check if necessary packages are available, and configure build flags
-cmd_conf_cfg = $< $(addprefix $(obj)/$*conf-, cflags libs bin)
+cmd_conf_cfg = $< $(addprefix $(obj)/$*conf-, cflags libs bin); touch $(obj)/$*conf-bin
$(obj)/%conf-cflags $(obj)/%conf-libs $(obj)/%conf-bin: $(src)/%conf-cfg.sh
$(call cmd,conf_cfg)
diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index adab28fa7f89..094e52c979a8 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -1,7 +1,7 @@
#!/bin/sh
#
# Output a simple RPM spec file.
-# This version assumes a minimum of RPM 4.0.3.
+# This version assumes a minimum of RPM 4.13
#
# The only gothic bit here is redefining install_post to avoid
# stripping the symbols from files in the kernel which we want
diff --git a/scripts/tracing/ftrace-bisect.sh b/scripts/tracing/ftrace-bisect.sh
index 926701162bc8..bb4f59262bbe 100755
--- a/scripts/tracing/ftrace-bisect.sh
+++ b/scripts/tracing/ftrace-bisect.sh
@@ -12,7 +12,7 @@
# (note, if this is a problem with function_graph tracing, then simply
# replace "function" with "function_graph" in the following steps).
#
-# # cd /sys/kernel/debug/tracing
+# # cd /sys/kernel/tracing
# # echo schedule > set_ftrace_filter
# # echo function > current_tracer
#
@@ -20,22 +20,40 @@
#
# # echo nop > current_tracer
#
-# # cat available_filter_functions > ~/full-file
+# Starting with v5.1 this can be done with numbers, making it much faster:
+#
+# The old (slow) way, for kernels before v5.1.
+#
+# [old-way] # cat available_filter_functions > ~/full-file
+#
+# [old-way] *** Note *** this process will take several minutes to update the
+# [old-way] filters. Setting multiple functions is an O(n^2) operation, and we
+# [old-way] are dealing with thousands of functions. So go have coffee, talk
+# [old-way] with your coworkers, read facebook. And eventually, this operation
+# [old-way] will end.
+#
+# The new way (using numbers) is an O(n) operation, and usually takes less than a second.
+#
+# seq `wc -l available_filter_functions | cut -d' ' -f1` > ~/full-file
+#
+# This will create a sequence of numbers that match the functions in
+# available_filter_functions, and when echoing in a number into the
+# set_ftrace_filter file, it will enable the corresponding function in
+# O(1) time. Making enabling all functions O(n) where n is the number of
+# functions to enable.
+#
+# For either the new or old way, the rest of the operations remain the same.
+#
# # ftrace-bisect ~/full-file ~/test-file ~/non-test-file
# # cat ~/test-file > set_ftrace_filter
#
-# *** Note *** this will take several minutes. Setting multiple functions is
-# an O(n^2) operation, and we are dealing with thousands of functions. So go
-# have coffee, talk with your coworkers, read facebook. And eventually, this
-# operation will end.
-#
# # echo function > current_tracer
#
# If it crashes, we know that ~/test-file has a bad function.
#
# Reboot back to test kernel.
#
-# # cd /sys/kernel/debug/tracing
+# # cd /sys/kernel/tracing
# # mv ~/test-file ~/full-file
#
# If it didn't crash.
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 424b2c1e586d..db7a51acf9db 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -1793,7 +1793,7 @@ fail2:
return error;
}
-static int ns_mkdir_op(struct user_namespace *mnt_userns, struct inode *dir,
+static int ns_mkdir_op(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct aa_ns *ns, *parent;
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 6dd3cc5309bf..f3715cda59c5 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -313,7 +313,7 @@ static int aa_xattrs_match(const struct linux_binprm *bprm,
d = bprm->file->f_path.dentry;
for (i = 0; i < attach->xattr_count; i++) {
- size = vfs_getxattr_alloc(&init_user_ns, d, attach->xattrs[i],
+ size = vfs_getxattr_alloc(&nop_mnt_idmap, d, attach->xattrs[i],
&value, value_size, GFP_KERNEL);
if (size >= 0) {
u32 index, perm;
@@ -862,7 +862,7 @@ int apparmor_bprm_creds_for_exec(struct linux_binprm *bprm)
const char *info = NULL;
int error = 0;
bool unsafe = false;
- vfsuid_t vfsuid = i_uid_into_vfsuid(file_mnt_user_ns(bprm->file),
+ vfsuid_t vfsuid = i_uid_into_vfsuid(file_mnt_idmap(bprm->file),
file_inode(bprm->file));
struct path_cond cond = {
vfsuid_into_kuid(vfsuid),
diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index cb3d3060d104..9119ddda6217 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -459,7 +459,7 @@ static int __file_path_perm(const char *op, struct aa_label *label,
{
struct aa_profile *profile;
struct aa_perms perms = {};
- vfsuid_t vfsuid = i_uid_into_vfsuid(file_mnt_user_ns(file),
+ vfsuid_t vfsuid = i_uid_into_vfsuid(file_mnt_idmap(file),
file_inode(file));
struct path_cond cond = {
.uid = vfsuid_into_kuid(vfsuid),
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index c6728a629437..d6cc4812ca53 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -227,8 +227,7 @@ static int common_perm(const char *op, const struct path *path, u32 mask,
*/
static int common_perm_cond(const char *op, const struct path *path, u32 mask)
{
- struct user_namespace *mnt_userns = mnt_user_ns(path->mnt);
- vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_userns,
+ vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_idmap(path->mnt),
d_backing_inode(path->dentry));
struct path_cond cond = {
vfsuid_into_kuid(vfsuid),
@@ -273,14 +272,13 @@ static int common_perm_rm(const char *op, const struct path *dir,
struct dentry *dentry, u32 mask)
{
struct inode *inode = d_backing_inode(dentry);
- struct user_namespace *mnt_userns = mnt_user_ns(dir->mnt);
struct path_cond cond = { };
vfsuid_t vfsuid;
if (!inode || !path_mediated_fs(dentry))
return 0;
- vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
+ vfsuid = i_uid_into_vfsuid(mnt_idmap(dir->mnt), inode);
cond.uid = vfsuid_into_kuid(vfsuid);
cond.mode = inode->i_mode;
@@ -379,7 +377,7 @@ static int apparmor_path_rename(const struct path *old_dir, struct dentry *old_d
label = begin_current_label_crit_section();
if (!unconfined(label)) {
- struct user_namespace *mnt_userns = mnt_user_ns(old_dir->mnt);
+ struct mnt_idmap *idmap = mnt_idmap(old_dir->mnt);
vfsuid_t vfsuid;
struct path old_path = { .mnt = old_dir->mnt,
.dentry = old_dentry };
@@ -388,14 +386,14 @@ static int apparmor_path_rename(const struct path *old_dir, struct dentry *old_d
struct path_cond cond = {
.mode = d_backing_inode(old_dentry)->i_mode
};
- vfsuid = i_uid_into_vfsuid(mnt_userns, d_backing_inode(old_dentry));
+ vfsuid = i_uid_into_vfsuid(idmap, d_backing_inode(old_dentry));
cond.uid = vfsuid_into_kuid(vfsuid);
if (flags & RENAME_EXCHANGE) {
struct path_cond cond_exchange = {
.mode = d_backing_inode(new_dentry)->i_mode,
};
- vfsuid = i_uid_into_vfsuid(mnt_userns, d_backing_inode(old_dentry));
+ vfsuid = i_uid_into_vfsuid(idmap, d_backing_inode(old_dentry));
cond_exchange.uid = vfsuid_into_kuid(vfsuid);
error = aa_path_perm(OP_RENAME_SRC, label, &new_path, 0,
@@ -460,13 +458,13 @@ static int apparmor_file_open(struct file *file)
label = aa_get_newest_cred_label(file->f_cred);
if (!unconfined(label)) {
- struct user_namespace *mnt_userns = file_mnt_user_ns(file);
+ struct mnt_idmap *idmap = file_mnt_idmap(file);
struct inode *inode = file_inode(file);
vfsuid_t vfsuid;
struct path_cond cond = {
.mode = inode->i_mode,
};
- vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
+ vfsuid = i_uid_into_vfsuid(idmap, inode);
cond.uid = vfsuid_into_kuid(vfsuid);
error = aa_path_perm(OP_OPEN, label, &file->f_path, 0,
diff --git a/security/apparmor/policy_compat.c b/security/apparmor/policy_compat.c
index 9e52e218bf30..cc89d1e88fb7 100644
--- a/security/apparmor/policy_compat.c
+++ b/security/apparmor/policy_compat.c
@@ -160,8 +160,7 @@ static struct aa_perms *compute_fperms(struct aa_dfa *dfa)
if (!table)
return NULL;
- /* zero init so skip the trap state (state == 0) */
- for (state = 1; state < state_count; state++) {
+ for (state = 0; state < state_count; state++) {
table[state * 2] = compute_fperms_user(dfa, state);
table[state * 2 + 1] = compute_fperms_other(dfa, state);
}
diff --git a/security/commoncap.c b/security/commoncap.c
index 1164278b97fd..aec62db55271 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -305,24 +305,24 @@ int cap_inode_need_killpriv(struct dentry *dentry)
/**
* cap_inode_killpriv - Erase the security markings on an inode
*
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dentry: The inode/dentry to alter
*
* Erase the privilege-enhancing security markings on an inode.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then
- * take care to map the inode according to @mnt_userns before checking
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then
+ * take care to map the inode according to @idmap before checking
* permissions. On non-idmapped mounts or if permission checking is to be
- * performed on the raw inode simply passs init_user_ns.
+ * performed on the raw inode simply passs @nop_mnt_idmap.
*
* Return: 0 if successful, -ve on error.
*/
-int cap_inode_killpriv(struct user_namespace *mnt_userns, struct dentry *dentry)
+int cap_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry)
{
int error;
- error = __vfs_removexattr(mnt_userns, dentry, XATTR_NAME_CAPS);
+ error = __vfs_removexattr(idmap, dentry, XATTR_NAME_CAPS);
if (error == -EOPNOTSUPP)
error = 0;
return error;
@@ -377,7 +377,7 @@ static bool is_v3header(int size, const struct vfs_cap_data *cap)
* by the integrity subsystem, which really wants the unconverted values -
* so that's good.
*/
-int cap_inode_getsecurity(struct user_namespace *mnt_userns,
+int cap_inode_getsecurity(struct mnt_idmap *idmap,
struct inode *inode, const char *name, void **buffer,
bool alloc)
{
@@ -398,7 +398,7 @@ int cap_inode_getsecurity(struct user_namespace *mnt_userns,
dentry = d_find_any_alias(inode);
if (!dentry)
return -EINVAL;
- size = vfs_getxattr_alloc(mnt_userns, dentry, XATTR_NAME_CAPS, &tmpbuf,
+ size = vfs_getxattr_alloc(idmap, dentry, XATTR_NAME_CAPS, &tmpbuf,
sizeof(struct vfs_ns_cap_data), GFP_NOFS);
dput(dentry);
/* gcc11 complains if we don't check for !tmpbuf */
@@ -420,7 +420,7 @@ int cap_inode_getsecurity(struct user_namespace *mnt_userns,
kroot = make_kuid(fs_ns, root);
/* If this is an idmapped mount shift the kuid. */
- vfsroot = make_vfsuid(mnt_userns, fs_ns, kroot);
+ vfsroot = make_vfsuid(idmap, fs_ns, kroot);
/* If the root kuid maps to a valid uid in current ns, then return
* this as a nscap. */
@@ -510,7 +510,7 @@ static bool validheader(size_t size, const struct vfs_cap_data *cap)
/**
* cap_convert_nscap - check vfs caps
*
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dentry: used to retrieve inode to check permissions on
* @ivalue: vfs caps value which may be modified by this function
* @size: size of @ivalue
@@ -518,15 +518,15 @@ static bool validheader(size_t size, const struct vfs_cap_data *cap)
* User requested a write of security.capability. If needed, update the
* xattr to change from v2 to v3, or to fixup the v3 rootid.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then
- * take care to map the inode according to @mnt_userns before checking
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then
+ * take care to map the inode according to @idmap before checking
* permissions. On non-idmapped mounts or if permission checking is to be
- * performed on the raw inode simply passs init_user_ns.
+ * performed on the raw inode simply passs @nop_mnt_idmap.
*
* Return: On success, return the new size; on error, return < 0.
*/
-int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry,
+int cap_convert_nscap(struct mnt_idmap *idmap, struct dentry *dentry,
const void **ivalue, size_t size)
{
struct vfs_ns_cap_data *nscap;
@@ -544,9 +544,9 @@ int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry,
return -EINVAL;
if (!validheader(size, cap))
return -EINVAL;
- if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP))
+ if (!capable_wrt_inode_uidgid(idmap, inode, CAP_SETFCAP))
return -EPERM;
- if (size == XATTR_CAPS_SZ_2 && (mnt_userns == fs_ns))
+ if (size == XATTR_CAPS_SZ_2 && (idmap == &nop_mnt_idmap))
if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))
/* user is privileged, just write the v2 */
return size;
@@ -555,7 +555,7 @@ int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry,
if (!vfsuid_valid(vfsrootid))
return -EINVAL;
- rootid = from_vfsuid(mnt_userns, fs_ns, vfsrootid);
+ rootid = from_vfsuid(idmap, fs_ns, vfsrootid);
if (!uid_valid(rootid))
return -EINVAL;
@@ -626,19 +626,19 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
/**
* get_vfs_caps_from_disk - retrieve vfs caps from disk
*
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dentry: dentry from which @inode is retrieved
* @cpu_caps: vfs capabilities
*
* Extract the on-exec-apply capability sets for an executable file.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then
- * take care to map the inode according to @mnt_userns before checking
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then
+ * take care to map the inode according to @idmap before checking
* permissions. On non-idmapped mounts or if permission checking is to be
- * performed on the raw inode simply passs init_user_ns.
+ * performed on the raw inode simply passs @nop_mnt_idmap.
*/
-int get_vfs_caps_from_disk(struct user_namespace *mnt_userns,
+int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
const struct dentry *dentry,
struct cpu_vfs_cap_data *cpu_caps)
{
@@ -695,7 +695,7 @@ int get_vfs_caps_from_disk(struct user_namespace *mnt_userns,
return -EINVAL;
}
- rootvfsuid = make_vfsuid(mnt_userns, fs_ns, rootkuid);
+ rootvfsuid = make_vfsuid(idmap, fs_ns, rootkuid);
if (!vfsuid_valid(rootvfsuid))
return -ENODATA;
@@ -747,7 +747,7 @@ static int get_file_caps(struct linux_binprm *bprm, struct file *file,
if (!current_in_userns(file->f_path.mnt->mnt_sb->s_user_ns))
return 0;
- rc = get_vfs_caps_from_disk(file_mnt_user_ns(file),
+ rc = get_vfs_caps_from_disk(file_mnt_idmap(file),
file->f_path.dentry, &vcaps);
if (rc < 0) {
if (rc == -EINVAL)
@@ -1016,23 +1016,23 @@ int cap_inode_setxattr(struct dentry *dentry, const char *name,
/**
* cap_inode_removexattr - Determine whether an xattr may be removed
*
- * @mnt_userns: User namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dentry: The inode/dentry being altered
* @name: The name of the xattr to be changed
*
* Determine whether an xattr may be removed from an inode, returning 0 if
* permission is granted, -ve if denied.
*
- * If the inode has been found through an idmapped mount the user namespace of
- * the vfsmount must be passed through @mnt_userns. This function will then
- * take care to map the inode according to @mnt_userns before checking
+ * If the inode has been found through an idmapped mount the idmap of
+ * the vfsmount must be passed through @idmap. This function will then
+ * take care to map the inode according to @idmap before checking
* permissions. On non-idmapped mounts or if permission checking is to be
- * performed on the raw inode simply passs init_user_ns.
+ * performed on the raw inode simply pass @nop_mnt_idmap.
*
* This is used to make sure security xattrs don't get removed by those who
* aren't privileged to remove them.
*/
-int cap_inode_removexattr(struct user_namespace *mnt_userns,
+int cap_inode_removexattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name)
{
struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
@@ -1047,7 +1047,7 @@ int cap_inode_removexattr(struct user_namespace *mnt_userns,
struct inode *inode = d_backing_inode(dentry);
if (!inode)
return -EINVAL;
- if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP))
+ if (!capable_wrt_inode_uidgid(idmap, inode, CAP_SETFCAP))
return -EPERM;
return 0;
}
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index fa5ff13fa8c9..52b811da6989 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -265,7 +265,7 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry,
req_xattr_value_len);
continue;
}
- size = vfs_getxattr_alloc(&init_user_ns, dentry, xattr->name,
+ size = vfs_getxattr_alloc(&nop_mnt_idmap, dentry, xattr->name,
&xattr_value, xattr_size, GFP_NOFS);
if (size == -ENOMEM) {
error = -ENOMEM;
@@ -274,7 +274,7 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry,
if (size < 0)
continue;
- user_space_size = vfs_getxattr(&init_user_ns, dentry,
+ user_space_size = vfs_getxattr(&nop_mnt_idmap, dentry,
xattr->name, NULL, 0);
if (user_space_size != size)
pr_debug("file %s: xattr %s size mismatch (kernel: %d, user: %d)\n",
@@ -331,7 +331,7 @@ static int evm_is_immutable(struct dentry *dentry, struct inode *inode)
return 1;
/* Do this the hard way */
- rc = vfs_getxattr_alloc(&init_user_ns, dentry, XATTR_NAME_EVM,
+ rc = vfs_getxattr_alloc(&nop_mnt_idmap, dentry, XATTR_NAME_EVM,
(char **)&xattr_data, 0, GFP_NOFS);
if (rc <= 0) {
if (rc == -ENODATA)
@@ -376,12 +376,12 @@ int evm_update_evmxattr(struct dentry *dentry, const char *xattr_name,
xattr_value_len, &data);
if (rc == 0) {
data.hdr.xattr.sha1.type = EVM_XATTR_HMAC;
- rc = __vfs_setxattr_noperm(&init_user_ns, dentry,
+ rc = __vfs_setxattr_noperm(&nop_mnt_idmap, dentry,
XATTR_NAME_EVM,
&data.hdr.xattr.data[1],
SHA1_DIGEST_SIZE + 1, 0);
} else if (rc == -ENODATA && (inode->i_opflags & IOP_XATTR)) {
- rc = __vfs_removexattr(&init_user_ns, dentry, XATTR_NAME_EVM);
+ rc = __vfs_removexattr(&nop_mnt_idmap, dentry, XATTR_NAME_EVM);
}
return rc;
}
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index f02e609460e2..cf24c5255583 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -184,7 +184,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
/* if status is not PASS, try to check again - against -ENOMEM */
/* first need to know the sig type */
- rc = vfs_getxattr_alloc(&init_user_ns, dentry, XATTR_NAME_EVM,
+ rc = vfs_getxattr_alloc(&nop_mnt_idmap, dentry, XATTR_NAME_EVM,
(char **)&xattr_data, 0, GFP_NOFS);
if (rc <= 0) {
evm_status = INTEGRITY_FAIL;
@@ -436,7 +436,7 @@ static enum integrity_status evm_verify_current_integrity(struct dentry *dentry)
/*
* evm_xattr_change - check if passed xattr value differs from current value
- * @mnt_userns: user namespace of the idmapped mount
+ * @idmap: idmap of the mount
* @dentry: pointer to the affected dentry
* @xattr_name: requested xattr
* @xattr_value: requested xattr value
@@ -446,14 +446,14 @@ static enum integrity_status evm_verify_current_integrity(struct dentry *dentry)
*
* Returns 1 if passed xattr value differs from current value, 0 otherwise.
*/
-static int evm_xattr_change(struct user_namespace *mnt_userns,
+static int evm_xattr_change(struct mnt_idmap *idmap,
struct dentry *dentry, const char *xattr_name,
const void *xattr_value, size_t xattr_value_len)
{
char *xattr_data = NULL;
int rc = 0;
- rc = vfs_getxattr_alloc(&init_user_ns, dentry, xattr_name, &xattr_data,
+ rc = vfs_getxattr_alloc(&nop_mnt_idmap, dentry, xattr_name, &xattr_data,
0, GFP_NOFS);
if (rc < 0) {
rc = 1;
@@ -482,7 +482,7 @@ out:
* For posix xattr acls only, permit security.evm, even if it currently
* doesn't exist, to be updated unless the EVM signature is immutable.
*/
-static int evm_protect_xattr(struct user_namespace *mnt_userns,
+static int evm_protect_xattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *xattr_name,
const void *xattr_value, size_t xattr_value_len)
{
@@ -538,7 +538,7 @@ out:
return 0;
if (evm_status == INTEGRITY_PASS_IMMUTABLE &&
- !evm_xattr_change(mnt_userns, dentry, xattr_name, xattr_value,
+ !evm_xattr_change(idmap, dentry, xattr_name, xattr_value,
xattr_value_len))
return 0;
@@ -553,7 +553,7 @@ out:
/**
* evm_inode_setxattr - protect the EVM extended attribute
- * @mnt_userns: user namespace of the idmapped mount
+ * @idmap: idmap of the mount
* @dentry: pointer to the affected dentry
* @xattr_name: pointer to the affected extended attribute name
* @xattr_value: pointer to the new extended attribute value
@@ -565,7 +565,7 @@ out:
* userspace from writing HMAC value. Writing 'security.evm' requires
* requires CAP_SYS_ADMIN privileges.
*/
-int evm_inode_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int evm_inode_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
const char *xattr_name, const void *xattr_value,
size_t xattr_value_len)
{
@@ -584,20 +584,20 @@ int evm_inode_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
xattr_data->type != EVM_XATTR_PORTABLE_DIGSIG)
return -EPERM;
}
- return evm_protect_xattr(mnt_userns, dentry, xattr_name, xattr_value,
+ return evm_protect_xattr(idmap, dentry, xattr_name, xattr_value,
xattr_value_len);
}
/**
* evm_inode_removexattr - protect the EVM extended attribute
- * @mnt_userns: user namespace of the idmapped mount
+ * @idmap: idmap of the mount
* @dentry: pointer to the affected dentry
* @xattr_name: pointer to the affected extended attribute name
*
* Removing 'security.evm' requires CAP_SYS_ADMIN privileges and that
* the current value is valid.
*/
-int evm_inode_removexattr(struct user_namespace *mnt_userns,
+int evm_inode_removexattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *xattr_name)
{
/* Policy permits modification of the protected xattrs even though
@@ -606,11 +606,11 @@ int evm_inode_removexattr(struct user_namespace *mnt_userns,
if (evm_initialized & EVM_ALLOW_METADATA_WRITES)
return 0;
- return evm_protect_xattr(mnt_userns, dentry, xattr_name, NULL, 0);
+ return evm_protect_xattr(idmap, dentry, xattr_name, NULL, 0);
}
#ifdef CONFIG_FS_POSIX_ACL
-static int evm_inode_set_acl_change(struct user_namespace *mnt_userns,
+static int evm_inode_set_acl_change(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name,
struct posix_acl *kacl)
{
@@ -622,14 +622,14 @@ static int evm_inode_set_acl_change(struct user_namespace *mnt_userns,
if (!kacl)
return 1;
- rc = posix_acl_update_mode(mnt_userns, inode, &mode, &kacl);
+ rc = posix_acl_update_mode(idmap, inode, &mode, &kacl);
if (rc || (inode->i_mode != mode))
return 1;
return 0;
}
#else
-static inline int evm_inode_set_acl_change(struct user_namespace *mnt_userns,
+static inline int evm_inode_set_acl_change(struct mnt_idmap *idmap,
struct dentry *dentry,
const char *name,
struct posix_acl *kacl)
@@ -640,7 +640,7 @@ static inline int evm_inode_set_acl_change(struct user_namespace *mnt_userns,
/**
* evm_inode_set_acl - protect the EVM extended attribute from posix acls
- * @mnt_userns: user namespace of the idmapped mount
+ * @idmap: idmap of the idmapped mount
* @dentry: pointer to the affected dentry
* @acl_name: name of the posix acl
* @kacl: pointer to the posix acls
@@ -649,7 +649,7 @@ static inline int evm_inode_set_acl_change(struct user_namespace *mnt_userns,
* and 'security.evm' xattr updated, unless the existing 'security.evm' is
* valid.
*/
-int evm_inode_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int evm_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
const char *acl_name, struct posix_acl *kacl)
{
enum integrity_status evm_status;
@@ -678,7 +678,7 @@ int evm_inode_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
return 0;
if (evm_status == INTEGRITY_PASS_IMMUTABLE &&
- !evm_inode_set_acl_change(mnt_userns, dentry, acl_name, kacl))
+ !evm_inode_set_acl_change(idmap, dentry, acl_name, kacl))
return 0;
if (evm_status != INTEGRITY_PASS_IMMUTABLE)
@@ -779,14 +779,14 @@ void evm_inode_post_removexattr(struct dentry *dentry, const char *xattr_name)
evm_update_evmxattr(dentry, xattr_name, NULL, 0);
}
-static int evm_attr_change(struct user_namespace *mnt_userns,
+static int evm_attr_change(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_backing_inode(dentry);
unsigned int ia_valid = attr->ia_valid;
- if (!i_uid_needs_update(mnt_userns, attr, inode) &&
- !i_gid_needs_update(mnt_userns, attr, inode) &&
+ if (!i_uid_needs_update(idmap, attr, inode) &&
+ !i_gid_needs_update(idmap, attr, inode) &&
(!(ia_valid & ATTR_MODE) || attr->ia_mode == inode->i_mode))
return 0;
@@ -800,7 +800,7 @@ static int evm_attr_change(struct user_namespace *mnt_userns,
* Permit update of file attributes when files have a valid EVM signature,
* except in the case of them having an immutable portable signature.
*/
-int evm_inode_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+int evm_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
unsigned int ia_valid = attr->ia_valid;
@@ -827,7 +827,7 @@ int evm_inode_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
return 0;
if (evm_status == INTEGRITY_PASS_IMMUTABLE &&
- !evm_attr_change(mnt_userns, dentry, attr))
+ !evm_attr_change(idmap, dentry, attr))
return 0;
integrity_audit_msg(AUDIT_INTEGRITY_METADATA, d_backing_inode(dentry),
diff --git a/security/integrity/evm/evm_secfs.c b/security/integrity/evm/evm_secfs.c
index 8a9db7dfca7e..9b907c2fee60 100644
--- a/security/integrity/evm/evm_secfs.c
+++ b/security/integrity/evm/evm_secfs.c
@@ -228,7 +228,7 @@ static ssize_t evm_write_xattrs(struct file *file, const char __user *buf,
newattrs.ia_valid = ATTR_MODE;
inode = evm_xattrs->d_inode;
inode_lock(inode);
- err = simple_setattr(&init_user_ns, evm_xattrs, &newattrs);
+ err = simple_setattr(&nop_mnt_idmap, evm_xattrs, &newattrs);
inode_unlock(inode);
if (!err)
err = count;
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 03b440921e61..d8530e722515 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -254,7 +254,7 @@ static inline void ima_process_queued_keys(void) {}
#endif /* CONFIG_IMA_QUEUE_EARLY_BOOT_KEYS */
/* LIM API function definitions */
-int ima_get_action(struct user_namespace *mnt_userns, struct inode *inode,
+int ima_get_action(struct mnt_idmap *idmap, struct inode *inode,
const struct cred *cred, u32 secid, int mask,
enum ima_hooks func, int *pcr,
struct ima_template_desc **template_desc,
@@ -268,7 +268,7 @@ void ima_store_measurement(struct integrity_iint_cache *iint, struct file *file,
struct evm_ima_xattr_data *xattr_value,
int xattr_len, const struct modsig *modsig, int pcr,
struct ima_template_desc *template_desc);
-int process_buffer_measurement(struct user_namespace *mnt_userns,
+int process_buffer_measurement(struct mnt_idmap *idmap,
struct inode *inode, const void *buf, int size,
const char *eventname, enum ima_hooks func,
int pcr, const char *func_data,
@@ -285,7 +285,7 @@ void ima_free_template_entry(struct ima_template_entry *entry);
const char *ima_d_path(const struct path *path, char **pathbuf, char *filename);
/* IMA policy related functions */
-int ima_match_policy(struct user_namespace *mnt_userns, struct inode *inode,
+int ima_match_policy(struct mnt_idmap *idmap, struct inode *inode,
const struct cred *cred, u32 secid, enum ima_hooks func,
int mask, int flags, int *pcr,
struct ima_template_desc **template_desc,
@@ -318,7 +318,7 @@ int ima_appraise_measurement(enum ima_hooks func,
struct file *file, const unsigned char *filename,
struct evm_ima_xattr_data *xattr_value,
int xattr_len, const struct modsig *modsig);
-int ima_must_appraise(struct user_namespace *mnt_userns, struct inode *inode,
+int ima_must_appraise(struct mnt_idmap *idmap, struct inode *inode,
int mask, enum ima_hooks func);
void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file);
enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
@@ -346,7 +346,7 @@ static inline int ima_appraise_measurement(enum ima_hooks func,
return INTEGRITY_UNKNOWN;
}
-static inline int ima_must_appraise(struct user_namespace *mnt_userns,
+static inline int ima_must_appraise(struct mnt_idmap *idmap,
struct inode *inode, int mask,
enum ima_hooks func)
{
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index c1e76282b5ee..9345fd66f5b8 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -163,7 +163,7 @@ err_out:
/**
* ima_get_action - appraise & measure decision based on policy.
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @inode: pointer to the inode associated with the object being validated
* @cred: pointer to credentials structure to validate
* @secid: secid of the task being validated
@@ -186,7 +186,7 @@ err_out:
* Returns IMA_MEASURE, IMA_APPRAISE mask.
*
*/
-int ima_get_action(struct user_namespace *mnt_userns, struct inode *inode,
+int ima_get_action(struct mnt_idmap *idmap, struct inode *inode,
const struct cred *cred, u32 secid, int mask,
enum ima_hooks func, int *pcr,
struct ima_template_desc **template_desc,
@@ -196,7 +196,7 @@ int ima_get_action(struct user_namespace *mnt_userns, struct inode *inode,
flags &= ima_policy_flag;
- return ima_match_policy(mnt_userns, inode, cred, secid, func, mask,
+ return ima_match_policy(idmap, inode, cred, secid, func, mask,
flags, pcr, template_desc, func_data,
allowed_algos);
}
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index ee6f7e237f2e..555342d337f9 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -70,7 +70,7 @@ bool is_ima_appraise_enabled(void)
*
* Return 1 to appraise or hash
*/
-int ima_must_appraise(struct user_namespace *mnt_userns, struct inode *inode,
+int ima_must_appraise(struct mnt_idmap *idmap, struct inode *inode,
int mask, enum ima_hooks func)
{
u32 secid;
@@ -79,7 +79,7 @@ int ima_must_appraise(struct user_namespace *mnt_userns, struct inode *inode,
return 0;
security_current_getsecid_subj(&secid);
- return ima_match_policy(mnt_userns, inode, current_cred(), secid,
+ return ima_match_policy(idmap, inode, current_cred(), secid,
func, mask, IMA_APPRAISE | IMA_HASH, NULL,
NULL, NULL, NULL);
}
@@ -98,7 +98,7 @@ static int ima_fix_xattr(struct dentry *dentry,
iint->ima_hash->xattr.ng.type = IMA_XATTR_DIGEST_NG;
iint->ima_hash->xattr.ng.algo = algo;
}
- rc = __vfs_setxattr_noperm(&init_user_ns, dentry, XATTR_NAME_IMA,
+ rc = __vfs_setxattr_noperm(&nop_mnt_idmap, dentry, XATTR_NAME_IMA,
&iint->ima_hash->xattr.data[offset],
(sizeof(iint->ima_hash->xattr) - offset) +
iint->ima_hash->length, 0);
@@ -225,7 +225,7 @@ int ima_read_xattr(struct dentry *dentry,
{
int ret;
- ret = vfs_getxattr_alloc(&init_user_ns, dentry, XATTR_NAME_IMA,
+ ret = vfs_getxattr_alloc(&nop_mnt_idmap, dentry, XATTR_NAME_IMA,
(char **)xattr_value, xattr_len, GFP_NOFS);
if (ret == -EOPNOTSUPP)
ret = 0;
@@ -456,7 +456,7 @@ int ima_check_blacklist(struct integrity_iint_cache *iint,
rc = is_binary_blacklisted(digest, digestsize);
if ((rc == -EPERM) && (iint->flags & IMA_MEASURE))
- process_buffer_measurement(&init_user_ns, NULL, digest, digestsize,
+ process_buffer_measurement(&nop_mnt_idmap, NULL, digest, digestsize,
"blacklisted-hash", NONE,
pcr, NULL, false, NULL, 0);
}
@@ -622,7 +622,7 @@ void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file)
/**
* ima_inode_post_setattr - reflect file metadata changes
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dentry: pointer to the affected dentry
*
* Changes to a dentry's metadata might result in needing to appraise.
@@ -630,7 +630,7 @@ void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file)
* This function is called from notify_change(), which expects the caller
* to lock the inode's i_mutex.
*/
-void ima_inode_post_setattr(struct user_namespace *mnt_userns,
+void ima_inode_post_setattr(struct mnt_idmap *idmap,
struct dentry *dentry)
{
struct inode *inode = d_backing_inode(dentry);
@@ -641,7 +641,7 @@ void ima_inode_post_setattr(struct user_namespace *mnt_userns,
|| !(inode->i_opflags & IOP_XATTR))
return;
- action = ima_must_appraise(mnt_userns, inode, MAY_ACCESS, POST_SETATTR);
+ action = ima_must_appraise(idmap, inode, MAY_ACCESS, POST_SETATTR);
iint = integrity_iint_find(inode);
if (iint) {
set_bit(IMA_CHANGE_ATTR, &iint->atomic_flags);
@@ -774,7 +774,7 @@ int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
return result;
}
-int ima_inode_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int ima_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
const char *acl_name, struct posix_acl *kacl)
{
if (evm_revalidate_status(acl_name))
diff --git a/security/integrity/ima/ima_asymmetric_keys.c b/security/integrity/ima/ima_asymmetric_keys.c
index f6aa0b47a772..caacfe6860b1 100644
--- a/security/integrity/ima/ima_asymmetric_keys.c
+++ b/security/integrity/ima/ima_asymmetric_keys.c
@@ -60,7 +60,7 @@ void ima_post_key_create_or_update(struct key *keyring, struct key *key,
* if the IMA policy is configured to measure a key linked
* to the given keyring.
*/
- process_buffer_measurement(&init_user_ns, NULL, payload, payload_len,
+ process_buffer_measurement(&nop_mnt_idmap, NULL, payload, payload_len,
keyring->description, KEY_CHECK, 0,
keyring->description, false, NULL, 0);
}
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 377300973e6c..358578267fea 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -224,7 +224,7 @@ static int process_measurement(struct file *file, const struct cred *cred,
* bitmask based on the appraise/audit/measurement policy.
* Included is the appraise submask.
*/
- action = ima_get_action(file_mnt_user_ns(file), inode, cred, secid,
+ action = ima_get_action(file_mnt_idmap(file), inode, cred, secid,
mask, func, &pcr, &template_desc, NULL,
&allowed_algos);
violation_check = ((func == FILE_CHECK || func == MMAP_CHECK) &&
@@ -451,7 +451,7 @@ int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot)
security_current_getsecid_subj(&secid);
inode = file_inode(vma->vm_file);
- action = ima_get_action(file_mnt_user_ns(vma->vm_file), inode,
+ action = ima_get_action(file_mnt_idmap(vma->vm_file), inode,
current_cred(), secid, MAY_EXEC, MMAP_CHECK,
&pcr, &template, NULL, NULL);
@@ -638,14 +638,14 @@ EXPORT_SYMBOL_GPL(ima_inode_hash);
/**
* ima_post_create_tmpfile - mark newly created tmpfile as new
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @inode: inode of the newly created tmpfile
*
* No measuring, appraising or auditing of newly created tmpfiles is needed.
* Skip calling process_measurement(), but indicate which newly, created
* tmpfiles are in policy.
*/
-void ima_post_create_tmpfile(struct user_namespace *mnt_userns,
+void ima_post_create_tmpfile(struct mnt_idmap *idmap,
struct inode *inode)
{
struct integrity_iint_cache *iint;
@@ -654,7 +654,7 @@ void ima_post_create_tmpfile(struct user_namespace *mnt_userns,
if (!ima_policy_flag || !S_ISREG(inode->i_mode))
return;
- must_appraise = ima_must_appraise(mnt_userns, inode, MAY_ACCESS,
+ must_appraise = ima_must_appraise(idmap, inode, MAY_ACCESS,
FILE_CHECK);
if (!must_appraise)
return;
@@ -671,13 +671,13 @@ void ima_post_create_tmpfile(struct user_namespace *mnt_userns,
/**
* ima_post_path_mknod - mark as a new inode
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dentry: newly created dentry
*
* Mark files created via the mknodat syscall as new, so that the
* file data can be written later.
*/
-void ima_post_path_mknod(struct user_namespace *mnt_userns,
+void ima_post_path_mknod(struct mnt_idmap *idmap,
struct dentry *dentry)
{
struct integrity_iint_cache *iint;
@@ -687,7 +687,7 @@ void ima_post_path_mknod(struct user_namespace *mnt_userns,
if (!ima_policy_flag || !S_ISREG(inode->i_mode))
return;
- must_appraise = ima_must_appraise(mnt_userns, inode, MAY_ACCESS,
+ must_appraise = ima_must_appraise(idmap, inode, MAY_ACCESS,
FILE_CHECK);
if (!must_appraise)
return;
@@ -869,7 +869,7 @@ int ima_post_load_data(char *buf, loff_t size,
/**
* process_buffer_measurement - Measure the buffer or the buffer data hash
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @inode: inode associated with the object being measured (NULL for KEY_CHECK)
* @buf: pointer to the buffer that needs to be added to the log.
* @size: size of buffer(in bytes).
@@ -887,7 +887,7 @@ int ima_post_load_data(char *buf, loff_t size,
* has been written to the passed location but not added to a measurement entry,
* a negative value otherwise.
*/
-int process_buffer_measurement(struct user_namespace *mnt_userns,
+int process_buffer_measurement(struct mnt_idmap *idmap,
struct inode *inode, const void *buf, int size,
const char *eventname, enum ima_hooks func,
int pcr, const char *func_data,
@@ -931,7 +931,7 @@ int process_buffer_measurement(struct user_namespace *mnt_userns,
*/
if (func) {
security_current_getsecid_subj(&secid);
- action = ima_get_action(mnt_userns, inode, current_cred(),
+ action = ima_get_action(idmap, inode, current_cred(),
secid, 0, func, &pcr, &template,
func_data, NULL);
if (!(action & IMA_MEASURE) && !digest)
@@ -1011,7 +1011,7 @@ void ima_kexec_cmdline(int kernel_fd, const void *buf, int size)
if (!f.file)
return;
- process_buffer_measurement(file_mnt_user_ns(f.file), file_inode(f.file),
+ process_buffer_measurement(file_mnt_idmap(f.file), file_inode(f.file),
buf, size, "kexec-cmdline", KEXEC_CMDLINE, 0,
NULL, false, NULL, 0);
fdput(f);
@@ -1044,7 +1044,7 @@ int ima_measure_critical_data(const char *event_label,
if (!event_name || !event_label || !buf || !buf_len)
return -ENOPARAM;
- return process_buffer_measurement(&init_user_ns, NULL, buf, buf_len,
+ return process_buffer_measurement(&nop_mnt_idmap, NULL, buf, buf_len,
event_name, CRITICAL_DATA, 0,
event_label, hash, digest,
digest_len);
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 6a68ec270822..fc128a6b4abe 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -552,7 +552,7 @@ static bool ima_match_rule_data(struct ima_rule_entry *rule,
/**
* ima_match_rules - determine whether an inode matches the policy rule.
* @rule: a pointer to a rule
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @inode: a pointer to an inode
* @cred: a pointer to a credentials structure for user validation
* @secid: the secid of the task to be validated
@@ -563,7 +563,7 @@ static bool ima_match_rule_data(struct ima_rule_entry *rule,
* Returns true on rule match, false on failure.
*/
static bool ima_match_rules(struct ima_rule_entry *rule,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct inode *inode, const struct cred *cred,
u32 secid, enum ima_hooks func, int mask,
const char *func_data)
@@ -624,11 +624,11 @@ static bool ima_match_rules(struct ima_rule_entry *rule,
return false;
}
if ((rule->flags & IMA_FOWNER) &&
- !rule->fowner_op(i_uid_into_vfsuid(mnt_userns, inode),
+ !rule->fowner_op(i_uid_into_vfsuid(idmap, inode),
rule->fowner))
return false;
if ((rule->flags & IMA_FGROUP) &&
- !rule->fgroup_op(i_gid_into_vfsgid(mnt_userns, inode),
+ !rule->fgroup_op(i_gid_into_vfsgid(idmap, inode),
rule->fgroup))
return false;
for (i = 0; i < MAX_LSM_RULES; i++) {
@@ -713,7 +713,7 @@ static int get_subaction(struct ima_rule_entry *rule, enum ima_hooks func)
/**
* ima_match_policy - decision based on LSM and other conditions
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @inode: pointer to an inode for which the policy decision is being made
* @cred: pointer to a credentials structure for which the policy decision is
* being made
@@ -732,7 +732,7 @@ static int get_subaction(struct ima_rule_entry *rule, enum ima_hooks func)
* list when walking it. Reads are many orders of magnitude more numerous
* than writes so ima_match_policy() is classical RCU candidate.
*/
-int ima_match_policy(struct user_namespace *mnt_userns, struct inode *inode,
+int ima_match_policy(struct mnt_idmap *idmap, struct inode *inode,
const struct cred *cred, u32 secid, enum ima_hooks func,
int mask, int flags, int *pcr,
struct ima_template_desc **template_desc,
@@ -752,7 +752,7 @@ int ima_match_policy(struct user_namespace *mnt_userns, struct inode *inode,
if (!(entry->action & actmask))
continue;
- if (!ima_match_rules(entry, mnt_userns, inode, cred, secid,
+ if (!ima_match_rules(entry, idmap, inode, cred, secid,
func, mask, func_data))
continue;
diff --git a/security/integrity/ima/ima_queue_keys.c b/security/integrity/ima/ima_queue_keys.c
index 93056c03bf5a..4f0aea155bf9 100644
--- a/security/integrity/ima/ima_queue_keys.c
+++ b/security/integrity/ima/ima_queue_keys.c
@@ -159,7 +159,7 @@ void ima_process_queued_keys(void)
list_for_each_entry_safe(entry, tmp, &ima_keys, list) {
if (!timer_expired)
- process_buffer_measurement(&init_user_ns, NULL,
+ process_buffer_measurement(&nop_mnt_idmap, NULL,
entry->payload,
entry->payload_len,
entry->keyring_name,
diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
index 4564faae7d67..6cd0add524cd 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -598,7 +598,7 @@ int ima_eventevmsig_init(struct ima_event_data *event_data,
if (!event_data->file)
return 0;
- rc = vfs_getxattr_alloc(&init_user_ns, file_dentry(event_data->file),
+ rc = vfs_getxattr_alloc(&nop_mnt_idmap, file_dentry(event_data->file),
XATTR_NAME_EVM, (char **)&xattr_data, 0,
GFP_NOFS);
if (rc <= 0 || xattr_data->type != EVM_XATTR_PORTABLE_DIGSIG) {
diff --git a/security/keys/key.c b/security/keys/key.c
index c45afdd1dfbb..5c0c7df833f8 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -788,38 +788,18 @@ error:
goto out;
}
-/**
- * key_create_or_update - Update or create and instantiate a key.
- * @keyring_ref: A pointer to the destination keyring with possession flag.
- * @type: The type of key.
- * @description: The searchable description for the key.
- * @payload: The data to use to instantiate or update the key.
- * @plen: The length of @payload.
- * @perm: The permissions mask for a new key.
- * @flags: The quota flags for a new key.
- *
- * Search the destination keyring for a key of the same description and if one
- * is found, update it, otherwise create and instantiate a new one and create a
- * link to it from that keyring.
- *
- * If perm is KEY_PERM_UNDEF then an appropriate key permissions mask will be
- * concocted.
- *
- * Returns a pointer to the new key if successful, -ENODEV if the key type
- * wasn't available, -ENOTDIR if the keyring wasn't a keyring, -EACCES if the
- * caller isn't permitted to modify the keyring or the LSM did not permit
- * creation of the key.
- *
- * On success, the possession flag from the keyring ref will be tacked on to
- * the key ref before it is returned.
+/*
+ * Create or potentially update a key. The combined logic behind
+ * key_create_or_update() and key_create()
*/
-key_ref_t key_create_or_update(key_ref_t keyring_ref,
- const char *type,
- const char *description,
- const void *payload,
- size_t plen,
- key_perm_t perm,
- unsigned long flags)
+static key_ref_t __key_create_or_update(key_ref_t keyring_ref,
+ const char *type,
+ const char *description,
+ const void *payload,
+ size_t plen,
+ key_perm_t perm,
+ unsigned long flags,
+ bool allow_update)
{
struct keyring_index_key index_key = {
.description = description,
@@ -906,14 +886,23 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
goto error_link_end;
}
- /* if it's possible to update this type of key, search for an existing
- * key of the same type and description in the destination keyring and
- * update that instead if possible
+ /* if it's requested and possible to update this type of key, search
+ * for an existing key of the same type and description in the
+ * destination keyring and update that instead if possible
*/
- if (index_key.type->update) {
+ if (allow_update) {
+ if (index_key.type->update) {
+ key_ref = find_key_to_update(keyring_ref, &index_key);
+ if (key_ref)
+ goto found_matching_key;
+ }
+ } else {
key_ref = find_key_to_update(keyring_ref, &index_key);
- if (key_ref)
- goto found_matching_key;
+ if (key_ref) {
+ key_ref_put(key_ref);
+ key_ref = ERR_PTR(-EEXIST);
+ goto error_link_end;
+ }
}
/* if the client doesn't provide, decide on the permissions we want */
@@ -985,9 +974,83 @@ error:
goto error_free_prep;
}
+
+/**
+ * key_create_or_update - Update or create and instantiate a key.
+ * @keyring_ref: A pointer to the destination keyring with possession flag.
+ * @type: The type of key.
+ * @description: The searchable description for the key.
+ * @payload: The data to use to instantiate or update the key.
+ * @plen: The length of @payload.
+ * @perm: The permissions mask for a new key.
+ * @flags: The quota flags for a new key.
+ *
+ * Search the destination keyring for a key of the same description and if one
+ * is found, update it, otherwise create and instantiate a new one and create a
+ * link to it from that keyring.
+ *
+ * If perm is KEY_PERM_UNDEF then an appropriate key permissions mask will be
+ * concocted.
+ *
+ * Returns a pointer to the new key if successful, -ENODEV if the key type
+ * wasn't available, -ENOTDIR if the keyring wasn't a keyring, -EACCES if the
+ * caller isn't permitted to modify the keyring or the LSM did not permit
+ * creation of the key.
+ *
+ * On success, the possession flag from the keyring ref will be tacked on to
+ * the key ref before it is returned.
+ */
+key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ const char *type,
+ const char *description,
+ const void *payload,
+ size_t plen,
+ key_perm_t perm,
+ unsigned long flags)
+{
+ return __key_create_or_update(keyring_ref, type, description, payload,
+ plen, perm, flags, true);
+}
EXPORT_SYMBOL(key_create_or_update);
/**
+ * key_create - Create and instantiate a key.
+ * @keyring_ref: A pointer to the destination keyring with possession flag.
+ * @type: The type of key.
+ * @description: The searchable description for the key.
+ * @payload: The data to use to instantiate or update the key.
+ * @plen: The length of @payload.
+ * @perm: The permissions mask for a new key.
+ * @flags: The quota flags for a new key.
+ *
+ * Create and instantiate a new key and link to it from the destination keyring.
+ *
+ * If perm is KEY_PERM_UNDEF then an appropriate key permissions mask will be
+ * concocted.
+ *
+ * Returns a pointer to the new key if successful, -EEXIST if a key with the
+ * same description already exists, -ENODEV if the key type wasn't available,
+ * -ENOTDIR if the keyring wasn't a keyring, -EACCES if the caller isn't
+ * permitted to modify the keyring or the LSM did not permit creation of the
+ * key.
+ *
+ * On success, the possession flag from the keyring ref will be tacked on to
+ * the key ref before it is returned.
+ */
+key_ref_t key_create(key_ref_t keyring_ref,
+ const char *type,
+ const char *description,
+ const void *payload,
+ size_t plen,
+ key_perm_t perm,
+ unsigned long flags)
+{
+ return __key_create_or_update(keyring_ref, type, description, payload,
+ plen, perm, flags, false);
+}
+EXPORT_SYMBOL(key_create);
+
+/**
* key_update - Update a key's contents.
* @key_ref: The pointer (plus possession flag) to the key.
* @payload: The data to be used to update the key.
diff --git a/security/security.c b/security/security.c
index d1571900a8c7..4e1150c44ab7 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1354,7 +1354,7 @@ int security_inode_permission(struct inode *inode, int mask)
return call_int_hook(inode_permission, 0, inode, mask);
}
-int security_inode_setattr(struct user_namespace *mnt_userns,
+int security_inode_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr)
{
int ret;
@@ -1364,7 +1364,7 @@ int security_inode_setattr(struct user_namespace *mnt_userns,
ret = call_int_hook(inode_setattr, 0, dentry, attr);
if (ret)
return ret;
- return evm_inode_setattr(mnt_userns, dentry, attr);
+ return evm_inode_setattr(idmap, dentry, attr);
}
EXPORT_SYMBOL_GPL(security_inode_setattr);
@@ -1375,7 +1375,7 @@ int security_inode_getattr(const struct path *path)
return call_int_hook(inode_getattr, 0, path);
}
-int security_inode_setxattr(struct user_namespace *mnt_userns,
+int security_inode_setxattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
@@ -1387,7 +1387,7 @@ int security_inode_setxattr(struct user_namespace *mnt_userns,
* SELinux and Smack integrate the cap call,
* so assume that all LSMs supplying this call do so.
*/
- ret = call_int_hook(inode_setxattr, 1, mnt_userns, dentry, name, value,
+ ret = call_int_hook(inode_setxattr, 1, idmap, dentry, name, value,
size, flags);
if (ret == 1)
@@ -1397,10 +1397,10 @@ int security_inode_setxattr(struct user_namespace *mnt_userns,
ret = ima_inode_setxattr(dentry, name, value, size);
if (ret)
return ret;
- return evm_inode_setxattr(mnt_userns, dentry, name, value, size);
+ return evm_inode_setxattr(idmap, dentry, name, value, size);
}
-int security_inode_set_acl(struct user_namespace *mnt_userns,
+int security_inode_set_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name,
struct posix_acl *kacl)
{
@@ -1408,38 +1408,38 @@ int security_inode_set_acl(struct user_namespace *mnt_userns,
if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
return 0;
- ret = call_int_hook(inode_set_acl, 0, mnt_userns, dentry, acl_name,
+ ret = call_int_hook(inode_set_acl, 0, idmap, dentry, acl_name,
kacl);
if (ret)
return ret;
- ret = ima_inode_set_acl(mnt_userns, dentry, acl_name, kacl);
+ ret = ima_inode_set_acl(idmap, dentry, acl_name, kacl);
if (ret)
return ret;
- return evm_inode_set_acl(mnt_userns, dentry, acl_name, kacl);
+ return evm_inode_set_acl(idmap, dentry, acl_name, kacl);
}
-int security_inode_get_acl(struct user_namespace *mnt_userns,
+int security_inode_get_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name)
{
if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
return 0;
- return call_int_hook(inode_get_acl, 0, mnt_userns, dentry, acl_name);
+ return call_int_hook(inode_get_acl, 0, idmap, dentry, acl_name);
}
-int security_inode_remove_acl(struct user_namespace *mnt_userns,
+int security_inode_remove_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name)
{
int ret;
if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
return 0;
- ret = call_int_hook(inode_remove_acl, 0, mnt_userns, dentry, acl_name);
+ ret = call_int_hook(inode_remove_acl, 0, idmap, dentry, acl_name);
if (ret)
return ret;
- ret = ima_inode_remove_acl(mnt_userns, dentry, acl_name);
+ ret = ima_inode_remove_acl(idmap, dentry, acl_name);
if (ret)
return ret;
- return evm_inode_remove_acl(mnt_userns, dentry, acl_name);
+ return evm_inode_remove_acl(idmap, dentry, acl_name);
}
void security_inode_post_setxattr(struct dentry *dentry, const char *name,
@@ -1465,7 +1465,7 @@ int security_inode_listxattr(struct dentry *dentry)
return call_int_hook(inode_listxattr, 0, dentry);
}
-int security_inode_removexattr(struct user_namespace *mnt_userns,
+int security_inode_removexattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name)
{
int ret;
@@ -1476,15 +1476,15 @@ int security_inode_removexattr(struct user_namespace *mnt_userns,
* SELinux and Smack integrate the cap call,
* so assume that all LSMs supplying this call do so.
*/
- ret = call_int_hook(inode_removexattr, 1, mnt_userns, dentry, name);
+ ret = call_int_hook(inode_removexattr, 1, idmap, dentry, name);
if (ret == 1)
- ret = cap_inode_removexattr(mnt_userns, dentry, name);
+ ret = cap_inode_removexattr(idmap, dentry, name);
if (ret)
return ret;
ret = ima_inode_removexattr(dentry, name);
if (ret)
return ret;
- return evm_inode_removexattr(mnt_userns, dentry, name);
+ return evm_inode_removexattr(idmap, dentry, name);
}
int security_inode_need_killpriv(struct dentry *dentry)
@@ -1492,13 +1492,13 @@ int security_inode_need_killpriv(struct dentry *dentry)
return call_int_hook(inode_need_killpriv, 0, dentry);
}
-int security_inode_killpriv(struct user_namespace *mnt_userns,
+int security_inode_killpriv(struct mnt_idmap *idmap,
struct dentry *dentry)
{
- return call_int_hook(inode_killpriv, 0, mnt_userns, dentry);
+ return call_int_hook(inode_killpriv, 0, idmap, dentry);
}
-int security_inode_getsecurity(struct user_namespace *mnt_userns,
+int security_inode_getsecurity(struct mnt_idmap *idmap,
struct inode *inode, const char *name,
void **buffer, bool alloc)
{
@@ -1511,7 +1511,7 @@ int security_inode_getsecurity(struct user_namespace *mnt_userns,
* Only one module will provide an attribute with a given name.
*/
hlist_for_each_entry(hp, &security_hook_heads.inode_getsecurity, list) {
- rc = hp->hook.inode_getsecurity(mnt_userns, inode, name, buffer, alloc);
+ rc = hp->hook.inode_getsecurity(idmap, inode, name, buffer, alloc);
if (rc != LSM_RET_DEFAULT(inode_getsecurity))
return rc;
}
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 3c5be76a9199..9a5bdfc21314 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3145,7 +3145,7 @@ static bool has_cap_mac_admin(bool audit)
return true;
}
-static int selinux_inode_setxattr(struct user_namespace *mnt_userns,
+static int selinux_inode_setxattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
@@ -3167,13 +3167,13 @@ static int selinux_inode_setxattr(struct user_namespace *mnt_userns,
}
if (!selinux_initialized(&selinux_state))
- return (inode_owner_or_capable(mnt_userns, inode) ? 0 : -EPERM);
+ return (inode_owner_or_capable(idmap, inode) ? 0 : -EPERM);
sbsec = selinux_superblock(inode->i_sb);
if (!(sbsec->flags & SBLABEL_MNT))
return -EOPNOTSUPP;
- if (!inode_owner_or_capable(mnt_userns, inode))
+ if (!inode_owner_or_capable(idmap, inode))
return -EPERM;
ad.type = LSM_AUDIT_DATA_DENTRY;
@@ -3240,20 +3240,20 @@ static int selinux_inode_setxattr(struct user_namespace *mnt_userns,
&ad);
}
-static int selinux_inode_set_acl(struct user_namespace *mnt_userns,
+static int selinux_inode_set_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name,
struct posix_acl *kacl)
{
return dentry_has_perm(current_cred(), dentry, FILE__SETATTR);
}
-static int selinux_inode_get_acl(struct user_namespace *mnt_userns,
+static int selinux_inode_get_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name)
{
return dentry_has_perm(current_cred(), dentry, FILE__GETATTR);
}
-static int selinux_inode_remove_acl(struct user_namespace *mnt_userns,
+static int selinux_inode_remove_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name)
{
return dentry_has_perm(current_cred(), dentry, FILE__SETATTR);
@@ -3313,11 +3313,11 @@ static int selinux_inode_listxattr(struct dentry *dentry)
return dentry_has_perm(cred, dentry, FILE__GETATTR);
}
-static int selinux_inode_removexattr(struct user_namespace *mnt_userns,
+static int selinux_inode_removexattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name)
{
if (strcmp(name, XATTR_NAME_SELINUX)) {
- int rc = cap_inode_removexattr(mnt_userns, dentry, name);
+ int rc = cap_inode_removexattr(idmap, dentry, name);
if (rc)
return rc;
@@ -3383,7 +3383,7 @@ static int selinux_path_notify(const struct path *path, u64 mask,
*
* Permission check is handled by selinux_inode_getxattr hook.
*/
-static int selinux_inode_getsecurity(struct user_namespace *mnt_userns,
+static int selinux_inode_getsecurity(struct mnt_idmap *idmap,
struct inode *inode, const char *name,
void **buffer, bool alloc)
{
@@ -6588,14 +6588,14 @@ static int selinux_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen
*/
static int selinux_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
{
- return __vfs_setxattr_noperm(&init_user_ns, dentry, XATTR_NAME_SELINUX,
+ return __vfs_setxattr_noperm(&nop_mnt_idmap, dentry, XATTR_NAME_SELINUX,
ctx, ctxlen, 0);
}
static int selinux_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
{
int len = 0;
- len = selinux_inode_getsecurity(&init_user_ns, inode,
+ len = selinux_inode_getsecurity(&nop_mnt_idmap, inode,
XATTR_SELINUX_SUFFIX, ctx, true);
if (len < 0)
return len;
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 9a82a15685d1..cfcbb748da25 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1207,7 +1207,7 @@ static int smack_inode_getattr(const struct path *path)
/**
* smack_inode_setxattr - Smack check for setting xattrs
- * @mnt_userns: active user namespace
+ * @idmap: idmap of the mount
* @dentry: the object
* @name: name of the attribute
* @value: value of the attribute
@@ -1218,7 +1218,7 @@ static int smack_inode_getattr(const struct path *path)
*
* Returns 0 if access is permitted, an error code otherwise
*/
-static int smack_inode_setxattr(struct user_namespace *mnt_userns,
+static int smack_inode_setxattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
@@ -1334,7 +1334,7 @@ static int smack_inode_getxattr(struct dentry *dentry, const char *name)
/**
* smack_inode_removexattr - Smack check on removexattr
- * @mnt_userns: active user namespace
+ * @idmap: idmap of the mount
* @dentry: the object
* @name: name of the attribute
*
@@ -1342,7 +1342,7 @@ static int smack_inode_getxattr(struct dentry *dentry, const char *name)
*
* Returns 0 if access is permitted, an error code otherwise
*/
-static int smack_inode_removexattr(struct user_namespace *mnt_userns,
+static int smack_inode_removexattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name)
{
struct inode_smack *isp;
@@ -1358,7 +1358,7 @@ static int smack_inode_removexattr(struct user_namespace *mnt_userns,
if (!smack_privileged(CAP_MAC_ADMIN))
rc = -EPERM;
} else
- rc = cap_inode_removexattr(mnt_userns, dentry, name);
+ rc = cap_inode_removexattr(idmap, dentry, name);
if (rc != 0)
return rc;
@@ -1394,14 +1394,14 @@ static int smack_inode_removexattr(struct user_namespace *mnt_userns,
/**
* smack_inode_set_acl - Smack check for setting posix acls
- * @mnt_userns: the userns attached to the mnt this request came from
+ * @idmap: idmap of the mnt this request came from
* @dentry: the object
* @acl_name: name of the posix acl
* @kacl: the posix acls
*
* Returns 0 if access is permitted, an error code otherwise
*/
-static int smack_inode_set_acl(struct user_namespace *mnt_userns,
+static int smack_inode_set_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name,
struct posix_acl *kacl)
{
@@ -1418,13 +1418,13 @@ static int smack_inode_set_acl(struct user_namespace *mnt_userns,
/**
* smack_inode_get_acl - Smack check for getting posix acls
- * @mnt_userns: the userns attached to the mnt this request came from
+ * @idmap: idmap of the mnt this request came from
* @dentry: the object
* @acl_name: name of the posix acl
*
* Returns 0 if access is permitted, an error code otherwise
*/
-static int smack_inode_get_acl(struct user_namespace *mnt_userns,
+static int smack_inode_get_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name)
{
struct smk_audit_info ad;
@@ -1440,13 +1440,13 @@ static int smack_inode_get_acl(struct user_namespace *mnt_userns,
/**
* smack_inode_remove_acl - Smack check for getting posix acls
- * @mnt_userns: the userns attached to the mnt this request came from
+ * @idmap: idmap of the mnt this request came from
* @dentry: the object
* @acl_name: name of the posix acl
*
* Returns 0 if access is permitted, an error code otherwise
*/
-static int smack_inode_remove_acl(struct user_namespace *mnt_userns,
+static int smack_inode_remove_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name)
{
struct smk_audit_info ad;
@@ -1462,7 +1462,7 @@ static int smack_inode_remove_acl(struct user_namespace *mnt_userns,
/**
* smack_inode_getsecurity - get smack xattrs
- * @mnt_userns: active user namespace
+ * @idmap: idmap of the mount
* @inode: the object
* @name: attribute name
* @buffer: where to put the result
@@ -1470,7 +1470,7 @@ static int smack_inode_remove_acl(struct user_namespace *mnt_userns,
*
* Returns the size of the attribute or an error code
*/
-static int smack_inode_getsecurity(struct user_namespace *mnt_userns,
+static int smack_inode_getsecurity(struct mnt_idmap *idmap,
struct inode *inode, const char *name,
void **buffer, bool alloc)
{
@@ -3507,7 +3507,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode)
*/
if (isp->smk_flags & SMK_INODE_CHANGED) {
isp->smk_flags &= ~SMK_INODE_CHANGED;
- rc = __vfs_setxattr(&init_user_ns, dp, inode,
+ rc = __vfs_setxattr(&nop_mnt_idmap, dp, inode,
XATTR_NAME_SMACKTRANSMUTE,
TRANS_TRUE, TRANS_TRUE_SIZE,
0);
@@ -4686,7 +4686,7 @@ static int smack_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
static int smack_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
{
- return __vfs_setxattr_noperm(&init_user_ns, dentry, XATTR_NAME_SMACK,
+ return __vfs_setxattr_noperm(&nop_mnt_idmap, dentry, XATTR_NAME_SMACK,
ctx, ctxlen, 0);
}
diff --git a/security/tomoyo/Kconfig b/security/tomoyo/Kconfig
index b9f867100a9f..fad75be5f381 100644
--- a/security/tomoyo/Kconfig
+++ b/security/tomoyo/Kconfig
@@ -6,13 +6,11 @@ config SECURITY_TOMOYO
select SECURITYFS
select SECURITY_PATH
select SECURITY_NETWORK
- select SRCU
- select BUILD_BIN2C
default n
help
This selects TOMOYO Linux, pathname-based access control.
Required userspace tools and further information may be
- found at <http://tomoyo.sourceforge.jp/>.
+ found at <https://tomoyo.osdn.jp/>.
If you are unsure how to answer this question, answer N.
config SECURITY_TOMOYO_MAX_ACCEPT_ENTRY
diff --git a/security/tomoyo/Makefile b/security/tomoyo/Makefile
index cca5a3012fee..884ff155edc3 100644
--- a/security/tomoyo/Makefile
+++ b/security/tomoyo/Makefile
@@ -2,15 +2,18 @@
obj-y = audit.o common.o condition.o domain.o environ.o file.o gc.o group.o load_policy.o memory.o mount.o network.o realpath.o securityfs_if.o tomoyo.o util.o
targets += builtin-policy.h
-define do_policy
-echo "static char tomoyo_builtin_$(1)[] __initdata ="; \
-$(objtree)/scripts/bin2c <$(firstword $(wildcard $(obj)/policy/$(1).conf $(srctree)/$(src)/policy/$(1).conf.default) /dev/null); \
-echo ";"
-endef
-quiet_cmd_policy = POLICY $@
- cmd_policy = ($(call do_policy,profile); $(call do_policy,exception_policy); $(call do_policy,domain_policy); $(call do_policy,manager); $(call do_policy,stat)) >$@
-$(obj)/builtin-policy.h: $(wildcard $(obj)/policy/*.conf $(src)/policy/*.conf.default) FORCE
+quiet_cmd_policy = POLICY $@
+ cmd_policy = { \
+ $(foreach x, profile exception_policy domain_policy manager stat, \
+ printf 'static char tomoyo_builtin_$x[] __initdata =\n'; \
+ sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/\t"\1\\n"/' -- $(firstword $(filter %/$x.conf %/$x.conf.default, $^) /dev/null); \
+ printf '\t"";\n';) \
+ } > $@
+
+$(obj)/builtin-policy.h: $(wildcard $(obj)/policy/*.conf $(srctree)/$(src)/policy/*.conf.default) FORCE
$(call if_changed,policy)
+ifndef CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING
$(obj)/common.o: $(obj)/builtin-policy.h
+endif
diff --git a/sound/core/control.c b/sound/core/control.c
index 50e7ba66f187..82aa1af1d1d8 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -1203,14 +1203,19 @@ static int snd_ctl_elem_read(struct snd_card *card,
const u32 pattern = 0xdeadbeef;
int ret;
+ down_read(&card->controls_rwsem);
kctl = snd_ctl_find_id(card, &control->id);
- if (kctl == NULL)
- return -ENOENT;
+ if (kctl == NULL) {
+ ret = -ENOENT;
+ goto unlock;
+ }
index_offset = snd_ctl_get_ioff(kctl, &control->id);
vd = &kctl->vd[index_offset];
- if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) || kctl->get == NULL)
- return -EPERM;
+ if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) || kctl->get == NULL) {
+ ret = -EPERM;
+ goto unlock;
+ }
snd_ctl_build_ioff(&control->id, kctl, index_offset);
@@ -1220,7 +1225,7 @@ static int snd_ctl_elem_read(struct snd_card *card,
info.id = control->id;
ret = __snd_ctl_elem_info(card, kctl, &info, NULL);
if (ret < 0)
- return ret;
+ goto unlock;
#endif
if (!snd_ctl_skip_validation(&info))
@@ -1230,7 +1235,7 @@ static int snd_ctl_elem_read(struct snd_card *card,
ret = kctl->get(kctl, control);
snd_power_unref(card);
if (ret < 0)
- return ret;
+ goto unlock;
if (!snd_ctl_skip_validation(&info) &&
sanity_check_elem_value(card, control, &info, pattern) < 0) {
dev_err(card->dev,
@@ -1238,8 +1243,11 @@ static int snd_ctl_elem_read(struct snd_card *card,
control->id.iface, control->id.device,
control->id.subdevice, control->id.name,
control->id.index);
- return -EINVAL;
+ ret = -EINVAL;
+ goto unlock;
}
+unlock:
+ up_read(&card->controls_rwsem);
return ret;
}
@@ -1253,9 +1261,7 @@ static int snd_ctl_elem_read_user(struct snd_card *card,
if (IS_ERR(control))
return PTR_ERR(control);
- down_read(&card->controls_rwsem);
result = snd_ctl_elem_read(card, control);
- up_read(&card->controls_rwsem);
if (result < 0)
goto error;
diff --git a/sound/core/control_led.c b/sound/core/control_led.c
index f975cc85772b..3cadd40100f3 100644
--- a/sound/core/control_led.c
+++ b/sound/core/control_led.c
@@ -530,12 +530,11 @@ static ssize_t set_led_id(struct snd_ctl_led_card *led_card, const char *buf, si
bool attach)
{
char buf2[256], *s, *os;
- size_t len = max(sizeof(s) - 1, count);
struct snd_ctl_elem_id id;
int err;
- strncpy(buf2, buf, len);
- buf2[len] = '\0';
+ if (strscpy(buf2, buf, sizeof(buf2)) < 0)
+ return -E2BIG;
memset(&id, 0, sizeof(id));
id.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
s = buf2;
diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c
index 81025f50a542..f901504b5afc 100644
--- a/sound/core/memalloc.c
+++ b/sound/core/memalloc.c
@@ -541,16 +541,15 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size)
struct sg_table *sgt;
void *p;
+#ifdef CONFIG_SND_DMA_SGBUF
+ if (cpu_feature_enabled(X86_FEATURE_XENPV))
+ return snd_dma_sg_fallback_alloc(dmab, size);
+#endif
sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir,
DEFAULT_GFP, 0);
#ifdef CONFIG_SND_DMA_SGBUF
- if (!sgt && !get_dma_ops(dmab->dev.dev)) {
- if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG)
- dmab->dev.type = SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK;
- else
- dmab->dev.type = SNDRV_DMA_TYPE_DEV_SG_FALLBACK;
+ if (!sgt && !get_dma_ops(dmab->dev.dev))
return snd_dma_sg_fallback_alloc(dmab, size);
- }
#endif
if (!sgt)
return NULL;
@@ -717,19 +716,38 @@ static const struct snd_malloc_ops snd_dma_sg_wc_ops = {
/* Fallback SG-buffer allocations for x86 */
struct snd_dma_sg_fallback {
+ bool use_dma_alloc_coherent;
size_t count;
struct page **pages;
+ /* DMA address array; the first page contains #pages in ~PAGE_MASK */
+ dma_addr_t *addrs;
};
static void __snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab,
struct snd_dma_sg_fallback *sgbuf)
{
- bool wc = dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK;
- size_t i;
-
- for (i = 0; i < sgbuf->count && sgbuf->pages[i]; i++)
- do_free_pages(page_address(sgbuf->pages[i]), PAGE_SIZE, wc);
+ size_t i, size;
+
+ if (sgbuf->pages && sgbuf->addrs) {
+ i = 0;
+ while (i < sgbuf->count) {
+ if (!sgbuf->pages[i] || !sgbuf->addrs[i])
+ break;
+ size = sgbuf->addrs[i] & ~PAGE_MASK;
+ if (WARN_ON(!size))
+ break;
+ if (sgbuf->use_dma_alloc_coherent)
+ dma_free_coherent(dmab->dev.dev, size << PAGE_SHIFT,
+ page_address(sgbuf->pages[i]),
+ sgbuf->addrs[i] & PAGE_MASK);
+ else
+ do_free_pages(page_address(sgbuf->pages[i]),
+ size << PAGE_SHIFT, false);
+ i += size;
+ }
+ }
kvfree(sgbuf->pages);
+ kvfree(sgbuf->addrs);
kfree(sgbuf);
}
@@ -738,24 +756,36 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size)
struct snd_dma_sg_fallback *sgbuf;
struct page **pagep, *curp;
size_t chunk, npages;
+ dma_addr_t *addrp;
dma_addr_t addr;
void *p;
- bool wc = dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK;
+
+ /* correct the type */
+ if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_SG)
+ dmab->dev.type = SNDRV_DMA_TYPE_DEV_SG_FALLBACK;
+ else if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG)
+ dmab->dev.type = SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK;
sgbuf = kzalloc(sizeof(*sgbuf), GFP_KERNEL);
if (!sgbuf)
return NULL;
+ sgbuf->use_dma_alloc_coherent = cpu_feature_enabled(X86_FEATURE_XENPV);
size = PAGE_ALIGN(size);
sgbuf->count = size >> PAGE_SHIFT;
sgbuf->pages = kvcalloc(sgbuf->count, sizeof(*sgbuf->pages), GFP_KERNEL);
- if (!sgbuf->pages)
+ sgbuf->addrs = kvcalloc(sgbuf->count, sizeof(*sgbuf->addrs), GFP_KERNEL);
+ if (!sgbuf->pages || !sgbuf->addrs)
goto error;
pagep = sgbuf->pages;
- chunk = size;
+ addrp = sgbuf->addrs;
+ chunk = (PAGE_SIZE - 1) << PAGE_SHIFT; /* to fit in low bits in addrs */
while (size > 0) {
chunk = min(size, chunk);
- p = do_alloc_pages(dmab->dev.dev, chunk, &addr, wc);
+ if (sgbuf->use_dma_alloc_coherent)
+ p = dma_alloc_coherent(dmab->dev.dev, chunk, &addr, DEFAULT_GFP);
+ else
+ p = do_alloc_pages(dmab->dev.dev, chunk, &addr, false);
if (!p) {
if (chunk <= PAGE_SIZE)
goto error;
@@ -767,17 +797,25 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size)
size -= chunk;
/* fill pages */
npages = chunk >> PAGE_SHIFT;
+ *addrp = npages; /* store in lower bits */
curp = virt_to_page(p);
- while (npages--)
+ while (npages--) {
*pagep++ = curp++;
+ *addrp++ |= addr;
+ addr += PAGE_SIZE;
+ }
}
p = vmap(sgbuf->pages, sgbuf->count, VM_MAP, PAGE_KERNEL);
if (!p)
goto error;
+
+ if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK)
+ set_pages_array_wc(sgbuf->pages, sgbuf->count);
+
dmab->private_data = sgbuf;
/* store the first page address for convenience */
- dmab->addr = snd_sgbuf_get_addr(dmab, 0);
+ dmab->addr = sgbuf->addrs[0] & PAGE_MASK;
return p;
error:
@@ -787,10 +825,23 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size)
static void snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab)
{
+ struct snd_dma_sg_fallback *sgbuf = dmab->private_data;
+
+ if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK)
+ set_pages_array_wb(sgbuf->pages, sgbuf->count);
vunmap(dmab->area);
__snd_dma_sg_fallback_free(dmab, dmab->private_data);
}
+static dma_addr_t snd_dma_sg_fallback_get_addr(struct snd_dma_buffer *dmab,
+ size_t offset)
+{
+ struct snd_dma_sg_fallback *sgbuf = dmab->private_data;
+ size_t index = offset >> PAGE_SHIFT;
+
+ return (sgbuf->addrs[index] & PAGE_MASK) | (offset & ~PAGE_MASK);
+}
+
static int snd_dma_sg_fallback_mmap(struct snd_dma_buffer *dmab,
struct vm_area_struct *area)
{
@@ -805,8 +856,8 @@ static const struct snd_malloc_ops snd_dma_sg_fallback_ops = {
.alloc = snd_dma_sg_fallback_alloc,
.free = snd_dma_sg_fallback_free,
.mmap = snd_dma_sg_fallback_mmap,
+ .get_addr = snd_dma_sg_fallback_get_addr,
/* reuse vmalloc helpers */
- .get_addr = snd_dma_vmalloc_get_addr,
.get_page = snd_dma_vmalloc_get_page,
.get_chunk_size = snd_dma_vmalloc_get_chunk_size,
};
diff --git a/sound/firewire/motu/motu-hwdep.c b/sound/firewire/motu/motu-hwdep.c
index a900fc0e7644..88d1f4b56e4b 100644
--- a/sound/firewire/motu/motu-hwdep.c
+++ b/sound/firewire/motu/motu-hwdep.c
@@ -87,6 +87,10 @@ static long hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count,
return -EFAULT;
count = consumed;
+ } else {
+ spin_unlock_irq(&motu->lock);
+
+ count = 0;
}
return count;
diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c
index 91842c0c8c74..f7815ee24f83 100644
--- a/sound/pci/hda/cs35l41_hda.c
+++ b/sound/pci/hda/cs35l41_hda.c
@@ -598,8 +598,8 @@ static int cs35l41_system_suspend(struct device *dev)
dev_dbg(cs35l41->dev, "System Suspend\n");
if (cs35l41->hw_cfg.bst_type == CS35L41_EXT_BOOST_NO_VSPK_SWITCH) {
- dev_err(cs35l41->dev, "System Suspend not supported\n");
- return -EINVAL;
+ dev_err_once(cs35l41->dev, "System Suspend not supported\n");
+ return 0; /* don't block the whole system suspend */
}
ret = pm_runtime_force_suspend(dev);
@@ -624,8 +624,8 @@ static int cs35l41_system_resume(struct device *dev)
dev_dbg(cs35l41->dev, "System Resume\n");
if (cs35l41->hw_cfg.bst_type == CS35L41_EXT_BOOST_NO_VSPK_SWITCH) {
- dev_err(cs35l41->dev, "System Resume not supported\n");
- return -EINVAL;
+ dev_err_once(cs35l41->dev, "System Resume not supported\n");
+ return 0; /* don't block the whole system resume */
}
if (cs35l41->reset_gpio) {
@@ -647,6 +647,15 @@ static int cs35l41_system_resume(struct device *dev)
return ret;
}
+static int cs35l41_runtime_idle(struct device *dev)
+{
+ struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev);
+
+ if (cs35l41->hw_cfg.bst_type == CS35L41_EXT_BOOST_NO_VSPK_SWITCH)
+ return -EBUSY; /* suspend not supported yet on this model */
+ return 0;
+}
+
static int cs35l41_runtime_suspend(struct device *dev)
{
struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev);
@@ -1536,7 +1545,8 @@ void cs35l41_hda_remove(struct device *dev)
EXPORT_SYMBOL_NS_GPL(cs35l41_hda_remove, SND_HDA_SCODEC_CS35L41);
const struct dev_pm_ops cs35l41_hda_pm_ops = {
- RUNTIME_PM_OPS(cs35l41_runtime_suspend, cs35l41_runtime_resume, NULL)
+ RUNTIME_PM_OPS(cs35l41_runtime_suspend, cs35l41_runtime_resume,
+ cs35l41_runtime_idle)
SYSTEM_SLEEP_PM_OPS(cs35l41_system_suspend, cs35l41_system_resume)
};
EXPORT_SYMBOL_NS_GPL(cs35l41_hda_pm_ops, SND_HDA_SCODEC_CS35L41);
diff --git a/sound/pci/hda/hda_bind.c b/sound/pci/hda/hda_bind.c
index 1a868dd9dc4b..890c2f7c33fc 100644
--- a/sound/pci/hda/hda_bind.c
+++ b/sound/pci/hda/hda_bind.c
@@ -144,6 +144,7 @@ static int hda_codec_driver_probe(struct device *dev)
error:
snd_hda_codec_cleanup_for_unbind(codec);
+ codec->preset = NULL;
return err;
}
@@ -166,6 +167,7 @@ static int hda_codec_driver_remove(struct device *dev)
if (codec->patch_ops.free)
codec->patch_ops.free(codec);
snd_hda_codec_cleanup_for_unbind(codec);
+ codec->preset = NULL;
module_put(dev->driver->owner);
return 0;
}
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index edd653ece70d..2e728aad6771 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -795,7 +795,6 @@ void snd_hda_codec_cleanup_for_unbind(struct hda_codec *codec)
snd_array_free(&codec->cvt_setups);
snd_array_free(&codec->spdif_out);
snd_array_free(&codec->verbs);
- codec->preset = NULL;
codec->follower_dig_outs = NULL;
codec->spdif_status_reset = 0;
snd_array_free(&codec->mixers);
@@ -928,7 +927,6 @@ snd_hda_codec_device_init(struct hda_bus *bus, unsigned int codec_addr,
codec->depop_delay = -1;
codec->fixup_id = HDA_FIXUP_ID_NOT_SET;
codec->core.dev.release = snd_hda_codec_dev_release;
- codec->core.exec_verb = codec_exec_verb;
codec->core.type = HDA_DEV_LEGACY;
mutex_init(&codec->spdif_mutex);
@@ -999,6 +997,7 @@ int snd_hda_codec_device_new(struct hda_bus *bus, struct snd_card *card,
if (snd_BUG_ON(codec_addr > HDA_MAX_CODEC_ADDRESS))
return -EINVAL;
+ codec->core.exec_verb = codec_exec_verb;
codec->card = card;
codec->addr = codec_addr;
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 7b1a30a551f6..75e1d00074b9 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -1125,6 +1125,7 @@ static const struct hda_device_id snd_hda_id_conexant[] = {
HDA_CODEC_ENTRY(0x14f11f87, "SN6140", patch_conexant_auto),
HDA_CODEC_ENTRY(0x14f12008, "CX8200", patch_conexant_auto),
HDA_CODEC_ENTRY(0x14f120d0, "CX11970", patch_conexant_auto),
+ HDA_CODEC_ENTRY(0x14f120d1, "SN6180", patch_conexant_auto),
HDA_CODEC_ENTRY(0x14f15045, "CX20549 (Venice)", patch_conexant_auto),
HDA_CODEC_ENTRY(0x14f15047, "CX20551 (Waikiki)", patch_conexant_auto),
HDA_CODEC_ENTRY(0x14f15051, "CX20561 (Hermosa)", patch_conexant_auto),
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 386dd9d9143f..9ea633fe9339 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -1981,6 +1981,7 @@ static const struct snd_pci_quirk force_connect_list[] = {
SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1),
SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1),
SND_PCI_QUIRK(0x103c, 0x8711, "HP", 1),
+ SND_PCI_QUIRK(0x103c, 0x8715, "HP", 1),
SND_PCI_QUIRK(0x1462, 0xec94, "MS-7C94", 1),
SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", 1),
{}
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 3794b522c222..e103bb3693c0 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -832,7 +832,7 @@ do_sku:
alc_setup_gpio(codec, 0x02);
break;
case 7:
- alc_setup_gpio(codec, 0x03);
+ alc_setup_gpio(codec, 0x04);
break;
case 5:
default:
@@ -3564,6 +3564,15 @@ static void alc256_init(struct hda_codec *codec)
hda_nid_t hp_pin = alc_get_hp_pin(spec);
bool hp_pin_sense;
+ if (spec->ultra_low_power) {
+ alc_update_coef_idx(codec, 0x03, 1<<1, 1<<1);
+ alc_update_coef_idx(codec, 0x08, 3<<2, 3<<2);
+ alc_update_coef_idx(codec, 0x08, 7<<4, 0);
+ alc_update_coef_idx(codec, 0x3b, 1<<15, 0);
+ alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6);
+ msleep(30);
+ }
+
if (!hp_pin)
hp_pin = 0x21;
@@ -3575,14 +3584,6 @@ static void alc256_init(struct hda_codec *codec)
msleep(2);
alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */
- if (spec->ultra_low_power) {
- alc_update_coef_idx(codec, 0x03, 1<<1, 1<<1);
- alc_update_coef_idx(codec, 0x08, 3<<2, 3<<2);
- alc_update_coef_idx(codec, 0x08, 7<<4, 0);
- alc_update_coef_idx(codec, 0x3b, 1<<15, 0);
- alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6);
- msleep(30);
- }
snd_hda_codec_write(codec, hp_pin, 0,
AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE);
@@ -3713,6 +3714,13 @@ static void alc225_init(struct hda_codec *codec)
hda_nid_t hp_pin = alc_get_hp_pin(spec);
bool hp1_pin_sense, hp2_pin_sense;
+ if (spec->ultra_low_power) {
+ alc_update_coef_idx(codec, 0x08, 0x0f << 2, 3<<2);
+ alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6);
+ alc_update_coef_idx(codec, 0x33, 1<<11, 0);
+ msleep(30);
+ }
+
if (spec->codec_variant != ALC269_TYPE_ALC287 &&
spec->codec_variant != ALC269_TYPE_ALC245)
/* required only at boot or S3 and S4 resume time */
@@ -3734,12 +3742,6 @@ static void alc225_init(struct hda_codec *codec)
msleep(2);
alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */
- if (spec->ultra_low_power) {
- alc_update_coef_idx(codec, 0x08, 0x0f << 2, 3<<2);
- alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6);
- alc_update_coef_idx(codec, 0x33, 1<<11, 0);
- msleep(30);
- }
if (hp1_pin_sense || spec->ultra_low_power)
snd_hda_codec_write(codec, hp_pin, 0,
@@ -4644,6 +4646,16 @@ static void alc285_fixup_hp_coef_micmute_led(struct hda_codec *codec,
}
}
+static void alc285_fixup_hp_gpio_micmute_led(struct hda_codec *codec,
+ const struct hda_fixup *fix, int action)
+{
+ struct alc_spec *spec = codec->spec;
+
+ if (action == HDA_FIXUP_ACT_PRE_PROBE)
+ spec->micmute_led_polarity = 1;
+ alc_fixup_hp_gpio_led(codec, action, 0, 0x04);
+}
+
static void alc236_fixup_hp_coef_micmute_led(struct hda_codec *codec,
const struct hda_fixup *fix, int action)
{
@@ -4665,6 +4677,13 @@ static void alc285_fixup_hp_mute_led(struct hda_codec *codec,
alc285_fixup_hp_coef_micmute_led(codec, fix, action);
}
+static void alc285_fixup_hp_spectre_x360_mute_led(struct hda_codec *codec,
+ const struct hda_fixup *fix, int action)
+{
+ alc285_fixup_hp_mute_led_coefbit(codec, fix, action);
+ alc285_fixup_hp_gpio_micmute_led(codec, fix, action);
+}
+
static void alc236_fixup_hp_mute_led(struct hda_codec *codec,
const struct hda_fixup *fix, int action)
{
@@ -7106,6 +7125,7 @@ enum {
ALC285_FIXUP_ASUS_G533Z_PINS,
ALC285_FIXUP_HP_GPIO_LED,
ALC285_FIXUP_HP_MUTE_LED,
+ ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED,
ALC236_FIXUP_HP_GPIO_LED,
ALC236_FIXUP_HP_MUTE_LED,
ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF,
@@ -8486,6 +8506,10 @@ static const struct hda_fixup alc269_fixups[] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc285_fixup_hp_mute_led,
},
+ [ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc285_fixup_hp_spectre_x360_mute_led,
+ },
[ALC236_FIXUP_HP_GPIO_LED] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc236_fixup_hp_gpio_led,
@@ -9178,6 +9202,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1025, 0x142b, "Acer Swift SF314-42", ALC255_FIXUP_ACER_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1025, 0x1430, "Acer TravelMate B311R-31", ALC256_FIXUP_ACER_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1025, 0x1466, "Acer Aspire A515-56", ALC255_FIXUP_ACER_HEADPHONE_AND_MIC),
+ SND_PCI_QUIRK(0x1025, 0x1534, "Acer Predator PH315-54", ALC255_FIXUP_ACER_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
SND_PCI_QUIRK(0x1028, 0x053c, "Dell Latitude E5430", ALC292_FIXUP_DELL_E7X),
SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS),
@@ -9239,6 +9264,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x0b1a, "Dell Precision 5570", ALC289_FIXUP_DUAL_SPK),
SND_PCI_QUIRK(0x1028, 0x0b37, "Dell Inspiron 16 Plus 7620 2-in-1", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS),
SND_PCI_QUIRK(0x1028, 0x0b71, "Dell Inspiron 16 Plus 7620", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS),
+ SND_PCI_QUIRK(0x1028, 0x0c03, "Dell Precision 5340", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x0c19, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS),
SND_PCI_QUIRK(0x1028, 0x0c1a, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS),
SND_PCI_QUIRK(0x1028, 0x0c1b, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS),
@@ -9327,6 +9353,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO),
SND_PCI_QUIRK(0x103c, 0x86e7, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1),
SND_PCI_QUIRK(0x103c, 0x86e8, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1),
+ SND_PCI_QUIRK(0x103c, 0x86f9, "HP Spectre x360 13-aw0xxx", ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED),
SND_PCI_QUIRK(0x103c, 0x8716, "HP Elite Dragonfly G2 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
SND_PCI_QUIRK(0x103c, 0x8720, "HP EliteBook x360 1040 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED),
@@ -9396,6 +9423,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x89c3, "Zbook Studio G9", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x89c6, "Zbook Fury 17 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x89ca, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x89d3, "HP EliteBook 645 G9 (MB 89D2)", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8a78, "HP Dev One", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x103c, 0x8aa0, "HP ProBook 440 G9 (MB 8A9E)", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED),
@@ -9404,8 +9432,22 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b44, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b45, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b46, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b47, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x8b7a, "HP", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b7d, "HP", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b87, "HP", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b8a, "HP", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b8b, "HP", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b8d, "HP", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b92, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8bf0, "HP", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
@@ -9451,6 +9493,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE),
SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502),
+ SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS),
SND_PCI_QUIRK(0x1043, 0x1e5e, "ASUS ROG Strix G513", ALC294_FIXUP_ASUS_G513_PINS),
SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401),
@@ -9494,6 +9537,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x144d, 0xc812, "Samsung Notebook Pen S (NT950SBE-X58)", ALC298_FIXUP_SAMSUNG_AMP),
SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_AMP),
SND_PCI_QUIRK(0x144d, 0xc832, "Samsung Galaxy Book Flex Alpha (NP730QCJ)", ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET),
+ SND_PCI_QUIRK(0x144d, 0xca03, "Samsung Galaxy Book2 Pro 360 (NP930QED)", ALC298_FIXUP_SAMSUNG_AMP),
SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC),
@@ -9672,6 +9716,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */
SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802),
SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X),
+ SND_PCI_QUIRK(0x1c6c, 0x1251, "Positivo N14KP6-TG", ALC288_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1d05, 0x1132, "TongFang PHxTxX1", ALC256_FIXUP_SET_COEF_DEFAULTS),
SND_PCI_QUIRK(0x1d05, 0x1096, "TongFang GMxMRxx", ALC269_FIXUP_NO_SHUTUP),
SND_PCI_QUIRK(0x1d05, 0x1100, "TongFang GKxNRxx", ALC269_FIXUP_NO_SHUTUP),
diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index aea7fae2ca4b..2994f85bc1b9 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -819,6 +819,9 @@ static int add_secret_dac_path(struct hda_codec *codec)
return 0;
nums = snd_hda_get_connections(codec, spec->gen.mixer_nid, conn,
ARRAY_SIZE(conn) - 1);
+ if (nums < 0)
+ return nums;
+
for (i = 0; i < nums; i++) {
if (get_wcaps_type(get_wcaps(codec, conn[i])) == AC_WID_AUD_OUT)
return 0;
diff --git a/sound/pci/lx6464es/lx_core.c b/sound/pci/lx6464es/lx_core.c
index d3f58a3d17fb..b5b0d43bb8dc 100644
--- a/sound/pci/lx6464es/lx_core.c
+++ b/sound/pci/lx6464es/lx_core.c
@@ -493,12 +493,11 @@ int lx_buffer_ask(struct lx6464es *chip, u32 pipe, int is_capture,
dev_dbg(chip->card->dev,
"CMD_08_ASK_BUFFERS: needed %d, freed %d\n",
*r_needed, *r_freed);
- for (i = 0; i < MAX_STREAM_BUFFER; ++i) {
- for (i = 0; i != chip->rmh.stat_len; ++i)
- dev_dbg(chip->card->dev,
- " stat[%d]: %x, %x\n", i,
- chip->rmh.stat[i],
- chip->rmh.stat[i] & MASK_DATA_SIZE);
+ for (i = 0; i < MAX_STREAM_BUFFER && i < chip->rmh.stat_len;
+ ++i) {
+ dev_dbg(chip->card->dev, " stat[%d]: %x, %x\n", i,
+ chip->rmh.stat[i],
+ chip->rmh.stat[i] & MASK_DATA_SIZE);
}
}
diff --git a/sound/soc/amd/acp-es8336.c b/sound/soc/amd/acp-es8336.c
index 2fe8df86053a..89499542c803 100644
--- a/sound/soc/amd/acp-es8336.c
+++ b/sound/soc/amd/acp-es8336.c
@@ -198,9 +198,11 @@ static int st_es8336_late_probe(struct snd_soc_card *card)
int ret;
adev = acpi_dev_get_first_match_dev("ESSX8336", NULL, -1);
- if (adev)
- put_device(&adev->dev);
+ if (!adev)
+ return -ENODEV;
+
codec_dev = acpi_get_first_physical_node(adev);
+ acpi_dev_put(adev);
if (!codec_dev)
dev_err(card->dev, "can not find codec dev\n");
diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c
index 1f0b5527c594..36314753923b 100644
--- a/sound/soc/amd/yc/acp6x-mach.c
+++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -209,6 +209,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
{
.driver_data = &acp6x_card,
.matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "M5402RA"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "Alienware"),
DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m17 R5 AMD"),
}
@@ -220,6 +227,34 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "Redmi Book Pro 14 2022"),
}
},
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "TIMI"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Redmi Book Pro 15 2022"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Razer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Blade 14 (2022) - RZ09-0427"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "RB"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Swift SFA16-41"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IRBIS"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "15NBC1011"),
+ }
+ },
{}
};
diff --git a/sound/soc/codecs/cs42l56.c b/sound/soc/codecs/cs42l56.c
index 26066682c983..3b0e715549c9 100644
--- a/sound/soc/codecs/cs42l56.c
+++ b/sound/soc/codecs/cs42l56.c
@@ -1191,18 +1191,12 @@ static int cs42l56_i2c_probe(struct i2c_client *i2c_client)
if (pdata) {
cs42l56->pdata = *pdata;
} else {
- pdata = devm_kzalloc(&i2c_client->dev, sizeof(*pdata),
- GFP_KERNEL);
- if (!pdata)
- return -ENOMEM;
-
if (i2c_client->dev.of_node) {
ret = cs42l56_handle_of_data(i2c_client,
&cs42l56->pdata);
if (ret != 0)
return ret;
}
- cs42l56->pdata = *pdata;
}
if (cs42l56->pdata.gpio_nreset) {
diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c
index 9ddf6a35e91c..28a0565c2a95 100755..100644
--- a/sound/soc/codecs/es8326.c
+++ b/sound/soc/codecs/es8326.c
@@ -729,14 +729,16 @@ static int es8326_probe(struct snd_soc_component *component)
}
dev_dbg(component->dev, "jack-pol %x", es8326->jack_pol);
- ret = device_property_read_u8(component->dev, "everest,interrupt-src", &es8326->jack_pol);
+ ret = device_property_read_u8(component->dev, "everest,interrupt-src",
+ &es8326->interrupt_src);
if (ret != 0) {
dev_dbg(component->dev, "interrupt-src return %d", ret);
es8326->interrupt_src = ES8326_HP_DET_SRC_PIN9;
}
dev_dbg(component->dev, "interrupt-src %x", es8326->interrupt_src);
- ret = device_property_read_u8(component->dev, "everest,interrupt-clk", &es8326->jack_pol);
+ ret = device_property_read_u8(component->dev, "everest,interrupt-clk",
+ &es8326->interrupt_clk);
if (ret != 0) {
dev_dbg(component->dev, "interrupt-clk return %d", ret);
es8326->interrupt_clk = 0x45;
diff --git a/sound/soc/codecs/es8326.h b/sound/soc/codecs/es8326.h
index 8e5ffe5ee10d..8e5ffe5ee10d 100755..100644
--- a/sound/soc/codecs/es8326.h
+++ b/sound/soc/codecs/es8326.h
diff --git a/sound/soc/codecs/rt715-sdca-sdw.c b/sound/soc/codecs/rt715-sdca-sdw.c
index 3f981a9e7fb6..c54ecf3e6987 100644
--- a/sound/soc/codecs/rt715-sdca-sdw.c
+++ b/sound/soc/codecs/rt715-sdca-sdw.c
@@ -167,7 +167,7 @@ static int rt715_sdca_read_prop(struct sdw_slave *slave)
}
/* set the timeout values */
- prop->clk_stop_timeout = 20;
+ prop->clk_stop_timeout = 200;
return 0;
}
diff --git a/sound/soc/codecs/rt9120.c b/sound/soc/codecs/rt9120.c
index 644300e88b4c..fcf4fbaed3c7 100644
--- a/sound/soc/codecs/rt9120.c
+++ b/sound/soc/codecs/rt9120.c
@@ -177,8 +177,20 @@ static int rt9120_codec_probe(struct snd_soc_component *comp)
return 0;
}
+static int rt9120_codec_suspend(struct snd_soc_component *comp)
+{
+ return pm_runtime_force_suspend(comp->dev);
+}
+
+static int rt9120_codec_resume(struct snd_soc_component *comp)
+{
+ return pm_runtime_force_resume(comp->dev);
+}
+
static const struct snd_soc_component_driver rt9120_component_driver = {
.probe = rt9120_codec_probe,
+ .suspend = rt9120_codec_suspend,
+ .resume = rt9120_codec_resume,
.controls = rt9120_snd_controls,
.num_controls = ARRAY_SIZE(rt9120_snd_controls),
.dapm_widgets = rt9120_dapm_widgets,
diff --git a/sound/soc/codecs/tas5805m.c b/sound/soc/codecs/tas5805m.c
index beb4ec629a03..4e38eb7acea1 100644
--- a/sound/soc/codecs/tas5805m.c
+++ b/sound/soc/codecs/tas5805m.c
@@ -154,6 +154,7 @@ static const uint32_t tas5805m_volume[] = {
#define TAS5805M_VOLUME_MIN 0
struct tas5805m_priv {
+ struct i2c_client *i2c;
struct regulator *pvdd;
struct gpio_desc *gpio_pdn_n;
@@ -165,6 +166,9 @@ struct tas5805m_priv {
int vol[2];
bool is_powered;
bool is_muted;
+
+ struct work_struct work;
+ struct mutex lock;
};
static void set_dsp_scale(struct regmap *rm, int offset, int vol)
@@ -181,13 +185,11 @@ static void set_dsp_scale(struct regmap *rm, int offset, int vol)
regmap_bulk_write(rm, offset, v, ARRAY_SIZE(v));
}
-static void tas5805m_refresh(struct snd_soc_component *component)
+static void tas5805m_refresh(struct tas5805m_priv *tas5805m)
{
- struct tas5805m_priv *tas5805m =
- snd_soc_component_get_drvdata(component);
struct regmap *rm = tas5805m->regmap;
- dev_dbg(component->dev, "refresh: is_muted=%d, vol=%d/%d\n",
+ dev_dbg(&tas5805m->i2c->dev, "refresh: is_muted=%d, vol=%d/%d\n",
tas5805m->is_muted, tas5805m->vol[0], tas5805m->vol[1]);
regmap_write(rm, REG_PAGE, 0x00);
@@ -201,6 +203,9 @@ static void tas5805m_refresh(struct snd_soc_component *component)
set_dsp_scale(rm, 0x24, tas5805m->vol[0]);
set_dsp_scale(rm, 0x28, tas5805m->vol[1]);
+ regmap_write(rm, REG_PAGE, 0x00);
+ regmap_write(rm, REG_BOOK, 0x00);
+
/* Set/clear digital soft-mute */
regmap_write(rm, REG_DEVICE_CTRL_2,
(tas5805m->is_muted ? DCTRL2_MUTE : 0) |
@@ -226,8 +231,11 @@ static int tas5805m_vol_get(struct snd_kcontrol *kcontrol,
struct tas5805m_priv *tas5805m =
snd_soc_component_get_drvdata(component);
+ mutex_lock(&tas5805m->lock);
ucontrol->value.integer.value[0] = tas5805m->vol[0];
ucontrol->value.integer.value[1] = tas5805m->vol[1];
+ mutex_unlock(&tas5805m->lock);
+
return 0;
}
@@ -243,11 +251,13 @@ static int tas5805m_vol_put(struct snd_kcontrol *kcontrol,
snd_soc_kcontrol_component(kcontrol);
struct tas5805m_priv *tas5805m =
snd_soc_component_get_drvdata(component);
+ int ret = 0;
if (!(volume_is_valid(ucontrol->value.integer.value[0]) &&
volume_is_valid(ucontrol->value.integer.value[1])))
return -EINVAL;
+ mutex_lock(&tas5805m->lock);
if (tas5805m->vol[0] != ucontrol->value.integer.value[0] ||
tas5805m->vol[1] != ucontrol->value.integer.value[1]) {
tas5805m->vol[0] = ucontrol->value.integer.value[0];
@@ -256,11 +266,12 @@ static int tas5805m_vol_put(struct snd_kcontrol *kcontrol,
tas5805m->vol[0], tas5805m->vol[1],
tas5805m->is_powered);
if (tas5805m->is_powered)
- tas5805m_refresh(component);
- return 1;
+ tas5805m_refresh(tas5805m);
+ ret = 1;
}
+ mutex_unlock(&tas5805m->lock);
- return 0;
+ return ret;
}
static const struct snd_kcontrol_new tas5805m_snd_controls[] = {
@@ -294,54 +305,83 @@ static int tas5805m_trigger(struct snd_pcm_substream *substream, int cmd,
struct snd_soc_component *component = dai->component;
struct tas5805m_priv *tas5805m =
snd_soc_component_get_drvdata(component);
- struct regmap *rm = tas5805m->regmap;
- unsigned int chan, global1, global2;
switch (cmd) {
case SNDRV_PCM_TRIGGER_START:
case SNDRV_PCM_TRIGGER_RESUME:
case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
- dev_dbg(component->dev, "DSP startup\n");
-
- /* We mustn't issue any I2C transactions until the I2S
- * clock is stable. Furthermore, we must allow a 5ms
- * delay after the first set of register writes to
- * allow the DSP to boot before configuring it.
- */
- usleep_range(5000, 10000);
- send_cfg(rm, dsp_cfg_preboot,
- ARRAY_SIZE(dsp_cfg_preboot));
- usleep_range(5000, 15000);
- send_cfg(rm, tas5805m->dsp_cfg_data,
- tas5805m->dsp_cfg_len);
-
- tas5805m->is_powered = true;
- tas5805m_refresh(component);
+ dev_dbg(component->dev, "clock start\n");
+ schedule_work(&tas5805m->work);
break;
case SNDRV_PCM_TRIGGER_STOP:
case SNDRV_PCM_TRIGGER_SUSPEND:
case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
- dev_dbg(component->dev, "DSP shutdown\n");
+ break;
- tas5805m->is_powered = false;
+ default:
+ return -EINVAL;
+ }
- regmap_write(rm, REG_PAGE, 0x00);
- regmap_write(rm, REG_BOOK, 0x00);
+ return 0;
+}
- regmap_read(rm, REG_CHAN_FAULT, &chan);
- regmap_read(rm, REG_GLOBAL_FAULT1, &global1);
- regmap_read(rm, REG_GLOBAL_FAULT2, &global2);
+static void do_work(struct work_struct *work)
+{
+ struct tas5805m_priv *tas5805m =
+ container_of(work, struct tas5805m_priv, work);
+ struct regmap *rm = tas5805m->regmap;
- dev_dbg(component->dev,
- "fault regs: CHAN=%02x, GLOBAL1=%02x, GLOBAL2=%02x\n",
- chan, global1, global2);
+ dev_dbg(&tas5805m->i2c->dev, "DSP startup\n");
- regmap_write(rm, REG_DEVICE_CTRL_2, DCTRL2_MODE_HIZ);
- break;
+ mutex_lock(&tas5805m->lock);
+ /* We mustn't issue any I2C transactions until the I2S
+ * clock is stable. Furthermore, we must allow a 5ms
+ * delay after the first set of register writes to
+ * allow the DSP to boot before configuring it.
+ */
+ usleep_range(5000, 10000);
+ send_cfg(rm, dsp_cfg_preboot, ARRAY_SIZE(dsp_cfg_preboot));
+ usleep_range(5000, 15000);
+ send_cfg(rm, tas5805m->dsp_cfg_data, tas5805m->dsp_cfg_len);
+
+ tas5805m->is_powered = true;
+ tas5805m_refresh(tas5805m);
+ mutex_unlock(&tas5805m->lock);
+}
- default:
- return -EINVAL;
+static int tas5805m_dac_event(struct snd_soc_dapm_widget *w,
+ struct snd_kcontrol *kcontrol, int event)
+{
+ struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
+ struct tas5805m_priv *tas5805m =
+ snd_soc_component_get_drvdata(component);
+ struct regmap *rm = tas5805m->regmap;
+
+ if (event & SND_SOC_DAPM_PRE_PMD) {
+ unsigned int chan, global1, global2;
+
+ dev_dbg(component->dev, "DSP shutdown\n");
+ cancel_work_sync(&tas5805m->work);
+
+ mutex_lock(&tas5805m->lock);
+ if (tas5805m->is_powered) {
+ tas5805m->is_powered = false;
+
+ regmap_write(rm, REG_PAGE, 0x00);
+ regmap_write(rm, REG_BOOK, 0x00);
+
+ regmap_read(rm, REG_CHAN_FAULT, &chan);
+ regmap_read(rm, REG_GLOBAL_FAULT1, &global1);
+ regmap_read(rm, REG_GLOBAL_FAULT2, &global2);
+
+ dev_dbg(component->dev, "fault regs: CHAN=%02x, "
+ "GLOBAL1=%02x, GLOBAL2=%02x\n",
+ chan, global1, global2);
+
+ regmap_write(rm, REG_DEVICE_CTRL_2, DCTRL2_MODE_HIZ);
+ }
+ mutex_unlock(&tas5805m->lock);
}
return 0;
@@ -354,7 +394,8 @@ static const struct snd_soc_dapm_route tas5805m_audio_map[] = {
static const struct snd_soc_dapm_widget tas5805m_dapm_widgets[] = {
SND_SOC_DAPM_AIF_IN("DAC IN", "Playback", 0, SND_SOC_NOPM, 0, 0),
- SND_SOC_DAPM_DAC("DAC", NULL, SND_SOC_NOPM, 0, 0),
+ SND_SOC_DAPM_DAC_E("DAC", NULL, SND_SOC_NOPM, 0, 0,
+ tas5805m_dac_event, SND_SOC_DAPM_PRE_PMD),
SND_SOC_DAPM_OUTPUT("OUT")
};
@@ -375,11 +416,14 @@ static int tas5805m_mute(struct snd_soc_dai *dai, int mute, int direction)
struct tas5805m_priv *tas5805m =
snd_soc_component_get_drvdata(component);
+ mutex_lock(&tas5805m->lock);
dev_dbg(component->dev, "set mute=%d (is_powered=%d)\n",
mute, tas5805m->is_powered);
+
tas5805m->is_muted = mute;
if (tas5805m->is_powered)
- tas5805m_refresh(component);
+ tas5805m_refresh(tas5805m);
+ mutex_unlock(&tas5805m->lock);
return 0;
}
@@ -434,6 +478,7 @@ static int tas5805m_i2c_probe(struct i2c_client *i2c)
if (!tas5805m)
return -ENOMEM;
+ tas5805m->i2c = i2c;
tas5805m->pvdd = devm_regulator_get(dev, "pvdd");
if (IS_ERR(tas5805m->pvdd)) {
dev_err(dev, "failed to get pvdd supply: %ld\n",
@@ -507,6 +552,9 @@ static int tas5805m_i2c_probe(struct i2c_client *i2c)
gpiod_set_value(tas5805m->gpio_pdn_n, 1);
usleep_range(10000, 15000);
+ INIT_WORK(&tas5805m->work, do_work);
+ mutex_init(&tas5805m->lock);
+
/* Don't register through devm. We need to be able to unregister
* the component prior to deasserting PDN#
*/
@@ -527,6 +575,7 @@ static void tas5805m_i2c_remove(struct i2c_client *i2c)
struct device *dev = &i2c->dev;
struct tas5805m_priv *tas5805m = dev_get_drvdata(dev);
+ cancel_work_sync(&tas5805m->work);
snd_soc_unregister_component(dev);
gpiod_set_value(tas5805m->gpio_pdn_n, 0);
usleep_range(10000, 15000);
diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c
index ca6a01a230af..791d8738d1c0 100644
--- a/sound/soc/codecs/wm8904.c
+++ b/sound/soc/codecs/wm8904.c
@@ -697,6 +697,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w,
int dcs_mask;
int dcs_l, dcs_r;
int dcs_l_reg, dcs_r_reg;
+ int an_out_reg;
int timeout;
int pwr_reg;
@@ -712,6 +713,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w,
dcs_mask = WM8904_DCS_ENA_CHAN_0 | WM8904_DCS_ENA_CHAN_1;
dcs_r_reg = WM8904_DC_SERVO_8;
dcs_l_reg = WM8904_DC_SERVO_9;
+ an_out_reg = WM8904_ANALOGUE_OUT1_LEFT;
dcs_l = 0;
dcs_r = 1;
break;
@@ -720,6 +722,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w,
dcs_mask = WM8904_DCS_ENA_CHAN_2 | WM8904_DCS_ENA_CHAN_3;
dcs_r_reg = WM8904_DC_SERVO_6;
dcs_l_reg = WM8904_DC_SERVO_7;
+ an_out_reg = WM8904_ANALOGUE_OUT2_LEFT;
dcs_l = 2;
dcs_r = 3;
break;
@@ -792,6 +795,10 @@ static int out_pga_event(struct snd_soc_dapm_widget *w,
snd_soc_component_update_bits(component, reg,
WM8904_HPL_ENA_OUTP | WM8904_HPR_ENA_OUTP,
WM8904_HPL_ENA_OUTP | WM8904_HPR_ENA_OUTP);
+
+ /* Update volume, requires PGA to be powered */
+ val = snd_soc_component_read(component, an_out_reg);
+ snd_soc_component_write(component, an_out_reg, val);
break;
case SND_SOC_DAPM_POST_PMU:
diff --git a/sound/soc/codecs/wsa883x.c b/sound/soc/codecs/wsa883x.c
index 966ba4909204..58fdb4e9fd97 100644
--- a/sound/soc/codecs/wsa883x.c
+++ b/sound/soc/codecs/wsa883x.c
@@ -1359,8 +1359,8 @@ static struct snd_soc_dai_driver wsa883x_dais[] = {
.stream_name = "SPKR Playback",
.rates = WSA883X_RATES | WSA883X_FRAC_RATES,
.formats = WSA883X_FORMATS,
- .rate_max = 8000,
- .rate_min = 352800,
+ .rate_min = 8000,
+ .rate_max = 352800,
.channels_min = 1,
.channels_max = 1,
},
diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c
index c836848ef0a6..8d14b5593658 100644
--- a/sound/soc/fsl/fsl-asoc-card.c
+++ b/sound/soc/fsl/fsl-asoc-card.c
@@ -121,11 +121,11 @@ static const struct snd_soc_dapm_route audio_map[] = {
static const struct snd_soc_dapm_route audio_map_ac97[] = {
/* 1st half -- Normal DAPM routes */
- {"Playback", NULL, "AC97 Playback"},
- {"AC97 Capture", NULL, "Capture"},
+ {"AC97 Playback", NULL, "CPU AC97 Playback"},
+ {"CPU AC97 Capture", NULL, "AC97 Capture"},
/* 2nd half -- ASRC DAPM routes */
- {"AC97 Playback", NULL, "ASRC-Playback"},
- {"ASRC-Capture", NULL, "AC97 Capture"},
+ {"CPU AC97 Playback", NULL, "ASRC-Playback"},
+ {"ASRC-Capture", NULL, "CPU AC97 Capture"},
};
static const struct snd_soc_dapm_route audio_map_tx[] = {
diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c
index 7b17f152bbf3..94341e4352b3 100644
--- a/sound/soc/fsl/fsl_micfil.c
+++ b/sound/soc/fsl/fsl_micfil.c
@@ -315,21 +315,21 @@ static int hwvad_detected(struct snd_kcontrol *kcontrol,
static const struct snd_kcontrol_new fsl_micfil_snd_controls[] = {
SOC_SINGLE_SX_TLV("CH0 Volume", REG_MICFIL_OUT_CTRL,
- MICFIL_OUTGAIN_CHX_SHIFT(0), 0xF, 0x7, gain_tlv),
+ MICFIL_OUTGAIN_CHX_SHIFT(0), 0x8, 0xF, gain_tlv),
SOC_SINGLE_SX_TLV("CH1 Volume", REG_MICFIL_OUT_CTRL,
- MICFIL_OUTGAIN_CHX_SHIFT(1), 0xF, 0x7, gain_tlv),
+ MICFIL_OUTGAIN_CHX_SHIFT(1), 0x8, 0xF, gain_tlv),
SOC_SINGLE_SX_TLV("CH2 Volume", REG_MICFIL_OUT_CTRL,
- MICFIL_OUTGAIN_CHX_SHIFT(2), 0xF, 0x7, gain_tlv),
+ MICFIL_OUTGAIN_CHX_SHIFT(2), 0x8, 0xF, gain_tlv),
SOC_SINGLE_SX_TLV("CH3 Volume", REG_MICFIL_OUT_CTRL,
- MICFIL_OUTGAIN_CHX_SHIFT(3), 0xF, 0x7, gain_tlv),
+ MICFIL_OUTGAIN_CHX_SHIFT(3), 0x8, 0xF, gain_tlv),
SOC_SINGLE_SX_TLV("CH4 Volume", REG_MICFIL_OUT_CTRL,
- MICFIL_OUTGAIN_CHX_SHIFT(4), 0xF, 0x7, gain_tlv),
+ MICFIL_OUTGAIN_CHX_SHIFT(4), 0x8, 0xF, gain_tlv),
SOC_SINGLE_SX_TLV("CH5 Volume", REG_MICFIL_OUT_CTRL,
- MICFIL_OUTGAIN_CHX_SHIFT(5), 0xF, 0x7, gain_tlv),
+ MICFIL_OUTGAIN_CHX_SHIFT(5), 0x8, 0xF, gain_tlv),
SOC_SINGLE_SX_TLV("CH6 Volume", REG_MICFIL_OUT_CTRL,
- MICFIL_OUTGAIN_CHX_SHIFT(6), 0xF, 0x7, gain_tlv),
+ MICFIL_OUTGAIN_CHX_SHIFT(6), 0x8, 0xF, gain_tlv),
SOC_SINGLE_SX_TLV("CH7 Volume", REG_MICFIL_OUT_CTRL,
- MICFIL_OUTGAIN_CHX_SHIFT(7), 0xF, 0x7, gain_tlv),
+ MICFIL_OUTGAIN_CHX_SHIFT(7), 0x8, 0xF, gain_tlv),
SOC_ENUM_EXT("MICFIL Quality Select",
fsl_micfil_quality_enum,
micfil_quality_get, micfil_quality_set),
diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
index 1c9be8a5dcb1..35a52c3a020d 100644
--- a/sound/soc/fsl/fsl_sai.c
+++ b/sound/soc/fsl/fsl_sai.c
@@ -1141,6 +1141,7 @@ static int fsl_sai_check_version(struct device *dev)
sai->verid.version = val &
(FSL_SAI_VERID_MAJOR_MASK | FSL_SAI_VERID_MINOR_MASK);
+ sai->verid.version >>= FSL_SAI_VERID_MINOR_SHIFT;
sai->verid.feature = val & FSL_SAI_VERID_FEATURE_MASK;
ret = regmap_read(sai->regmap, FSL_SAI_PARAM, &val);
diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c
index c9e0e31d5b34..46a53551b955 100644
--- a/sound/soc/fsl/fsl_ssi.c
+++ b/sound/soc/fsl/fsl_ssi.c
@@ -1189,14 +1189,14 @@ static struct snd_soc_dai_driver fsl_ssi_ac97_dai = {
.symmetric_channels = 1,
.probe = fsl_ssi_dai_probe,
.playback = {
- .stream_name = "AC97 Playback",
+ .stream_name = "CPU AC97 Playback",
.channels_min = 2,
.channels_max = 2,
.rates = SNDRV_PCM_RATE_8000_48000,
.formats = SNDRV_PCM_FMTBIT_S16 | SNDRV_PCM_FMTBIT_S20,
},
.capture = {
- .stream_name = "AC97 Capture",
+ .stream_name = "CPU AC97 Capture",
.channels_min = 2,
.channels_max = 2,
.rates = SNDRV_PCM_RATE_48000,
diff --git a/sound/soc/intel/avs/core.c b/sound/soc/intel/avs/core.c
index 2ca24273c491..637501850728 100644
--- a/sound/soc/intel/avs/core.c
+++ b/sound/soc/intel/avs/core.c
@@ -481,6 +481,29 @@ err_remap_bar0:
return ret;
}
+static void avs_pci_shutdown(struct pci_dev *pci)
+{
+ struct hdac_bus *bus = pci_get_drvdata(pci);
+ struct avs_dev *adev = hdac_to_avs(bus);
+
+ cancel_work_sync(&adev->probe_work);
+ avs_ipc_block(adev->ipc);
+
+ snd_hdac_stop_streams(bus);
+ avs_dsp_op(adev, int_control, false);
+ snd_hdac_ext_bus_ppcap_int_enable(bus, false);
+ snd_hdac_ext_bus_link_power_down_all(bus);
+
+ snd_hdac_bus_stop_chip(bus);
+ snd_hdac_display_power(bus, HDA_CODEC_IDX_CONTROLLER, false);
+
+ if (avs_platattr_test(adev, CLDMA))
+ pci_free_irq(pci, 0, &code_loader);
+ pci_free_irq(pci, 0, adev);
+ pci_free_irq(pci, 0, bus);
+ pci_free_irq_vectors(pci);
+}
+
static void avs_pci_remove(struct pci_dev *pci)
{
struct hdac_device *hdev, *save;
@@ -739,6 +762,7 @@ static struct pci_driver avs_pci_driver = {
.id_table = avs_ids,
.probe = avs_pci_probe,
.remove = avs_pci_remove,
+ .shutdown = avs_pci_shutdown,
.driver = {
.pm = &avs_dev_pm,
},
diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig
index a472de1909f4..99308ed85277 100644
--- a/sound/soc/intel/boards/Kconfig
+++ b/sound/soc/intel/boards/Kconfig
@@ -554,10 +554,12 @@ config SND_SOC_INTEL_SOF_NAU8825_MACH
select SND_SOC_RT1015P
select SND_SOC_MAX98373_I2C
select SND_SOC_MAX98357A
+ select SND_SOC_NAU8315
select SND_SOC_DMIC
select SND_SOC_HDAC_HDMI
select SND_SOC_INTEL_HDA_DSP_COMMON
select SND_SOC_INTEL_SOF_MAXIM_COMMON
+ select SND_SOC_INTEL_SOF_REALTEK_COMMON
help
This adds support for ASoC machine driver for SOF platforms
with nau8825 codec.
diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c
index 09d1f0f6d686..df157b01df8b 100644
--- a/sound/soc/intel/boards/bytcht_es8316.c
+++ b/sound/soc/intel/boards/bytcht_es8316.c
@@ -497,21 +497,28 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev)
if (adev) {
snprintf(codec_name, sizeof(codec_name),
"i2c-%s", acpi_dev_name(adev));
- put_device(&adev->dev);
byt_cht_es8316_dais[dai_index].codecs->name = codec_name;
} else {
dev_err(dev, "Error cannot find '%s' dev\n", mach->id);
return -ENXIO;
}
+ codec_dev = acpi_get_first_physical_node(adev);
+ acpi_dev_put(adev);
+ if (!codec_dev)
+ return -EPROBE_DEFER;
+ priv->codec_dev = get_device(codec_dev);
+
/* override platform name, if required */
byt_cht_es8316_card.dev = dev;
platform_name = mach->mach_params.platform;
ret = snd_soc_fixup_dai_links_platform_name(&byt_cht_es8316_card,
platform_name);
- if (ret)
+ if (ret) {
+ put_device(codec_dev);
return ret;
+ }
/* Check for BYTCR or other platform and setup quirks */
dmi_id = dmi_first_match(byt_cht_es8316_quirk_table);
@@ -539,13 +546,10 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev)
/* get the clock */
priv->mclk = devm_clk_get(dev, "pmc_plt_clk_3");
- if (IS_ERR(priv->mclk))
+ if (IS_ERR(priv->mclk)) {
+ put_device(codec_dev);
return dev_err_probe(dev, PTR_ERR(priv->mclk), "clk_get pmc_plt_clk_3 failed\n");
-
- codec_dev = acpi_get_first_physical_node(adev);
- if (!codec_dev)
- return -EPROBE_DEFER;
- priv->codec_dev = get_device(codec_dev);
+ }
if (quirk & BYT_CHT_ES8316_JD_INVERTED)
props[cnt++] = PROPERTY_ENTRY_BOOL("everest,jack-detect-inverted");
diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c
index 4699ca79f3ea..79e0039c79a3 100644
--- a/sound/soc/intel/boards/bytcr_rt5640.c
+++ b/sound/soc/intel/boards/bytcr_rt5640.c
@@ -1636,13 +1636,18 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev)
if (adev) {
snprintf(byt_rt5640_codec_name, sizeof(byt_rt5640_codec_name),
"i2c-%s", acpi_dev_name(adev));
- put_device(&adev->dev);
byt_rt5640_dais[dai_index].codecs->name = byt_rt5640_codec_name;
} else {
dev_err(dev, "Error cannot find '%s' dev\n", mach->id);
return -ENXIO;
}
+ codec_dev = acpi_get_first_physical_node(adev);
+ acpi_dev_put(adev);
+ if (!codec_dev)
+ return -EPROBE_DEFER;
+ priv->codec_dev = get_device(codec_dev);
+
/*
* swap SSP0 if bytcr is detected
* (will be overridden if DMI quirk is detected)
@@ -1717,11 +1722,6 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev)
byt_rt5640_quirk = quirk_override;
}
- codec_dev = acpi_get_first_physical_node(adev);
- if (!codec_dev)
- return -EPROBE_DEFER;
- priv->codec_dev = get_device(codec_dev);
-
if (byt_rt5640_quirk & BYT_RT5640_JD_HP_ELITEP_1000G2) {
acpi_dev_add_driver_gpios(ACPI_COMPANION(priv->codec_dev),
byt_rt5640_hp_elitepad_1000g2_gpios);
diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c
index 81ac6eeda2e6..8fca9b82d4d0 100644
--- a/sound/soc/intel/boards/bytcr_rt5651.c
+++ b/sound/soc/intel/boards/bytcr_rt5651.c
@@ -922,7 +922,6 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev)
if (adev) {
snprintf(byt_rt5651_codec_name, sizeof(byt_rt5651_codec_name),
"i2c-%s", acpi_dev_name(adev));
- put_device(&adev->dev);
byt_rt5651_dais[dai_index].codecs->name = byt_rt5651_codec_name;
} else {
dev_err(dev, "Error cannot find '%s' dev\n", mach->id);
@@ -930,6 +929,7 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev)
}
codec_dev = acpi_get_first_physical_node(adev);
+ acpi_dev_put(adev);
if (!codec_dev)
return -EPROBE_DEFER;
priv->codec_dev = get_device(codec_dev);
diff --git a/sound/soc/intel/boards/bytcr_wm5102.c b/sound/soc/intel/boards/bytcr_wm5102.c
index 1669eb3bd80f..c0706537f673 100644
--- a/sound/soc/intel/boards/bytcr_wm5102.c
+++ b/sound/soc/intel/boards/bytcr_wm5102.c
@@ -411,9 +411,9 @@ static int snd_byt_wm5102_mc_probe(struct platform_device *pdev)
return -ENOENT;
}
snprintf(codec_name, sizeof(codec_name), "spi-%s", acpi_dev_name(adev));
- put_device(&adev->dev);
codec_dev = bus_find_device_by_name(&spi_bus_type, NULL, codec_name);
+ acpi_dev_put(adev);
if (!codec_dev)
return -EPROBE_DEFER;
diff --git a/sound/soc/intel/boards/sof_cs42l42.c b/sound/soc/intel/boards/sof_cs42l42.c
index e38bd2831e6a..e9d190cb13b0 100644
--- a/sound/soc/intel/boards/sof_cs42l42.c
+++ b/sound/soc/intel/boards/sof_cs42l42.c
@@ -336,6 +336,9 @@ static int create_spk_amp_dai_links(struct device *dev,
links[*id].platforms = platform_component;
links[*id].num_platforms = ARRAY_SIZE(platform_component);
links[*id].dpcm_playback = 1;
+ /* firmware-generated echo reference */
+ links[*id].dpcm_capture = 1;
+
links[*id].no_pcm = 1;
links[*id].cpus = &cpus[*id];
links[*id].num_cpus = 1;
diff --git a/sound/soc/intel/boards/sof_es8336.c b/sound/soc/intel/boards/sof_es8336.c
index 773e5d1d87d4..894b6610b9e2 100644
--- a/sound/soc/intel/boards/sof_es8336.c
+++ b/sound/soc/intel/boards/sof_es8336.c
@@ -681,7 +681,6 @@ static int sof_es8336_probe(struct platform_device *pdev)
if (adev) {
snprintf(codec_name, sizeof(codec_name),
"i2c-%s", acpi_dev_name(adev));
- put_device(&adev->dev);
dai_links[0].codecs->name = codec_name;
/* also fixup codec dai name if relevant */
@@ -692,16 +691,19 @@ static int sof_es8336_probe(struct platform_device *pdev)
return -ENXIO;
}
- ret = snd_soc_fixup_dai_links_platform_name(&sof_es8336_card,
- mach->mach_params.platform);
- if (ret)
- return ret;
-
codec_dev = acpi_get_first_physical_node(adev);
+ acpi_dev_put(adev);
if (!codec_dev)
return -EPROBE_DEFER;
priv->codec_dev = get_device(codec_dev);
+ ret = snd_soc_fixup_dai_links_platform_name(&sof_es8336_card,
+ mach->mach_params.platform);
+ if (ret) {
+ put_device(codec_dev);
+ return ret;
+ }
+
if (quirk & SOF_ES8336_JD_INVERTED)
props[cnt++] = PROPERTY_ENTRY_BOOL("everest,jack-detect-inverted");
diff --git a/sound/soc/intel/boards/sof_nau8825.c b/sound/soc/intel/boards/sof_nau8825.c
index 27880224359d..6794a0249a9a 100644
--- a/sound/soc/intel/boards/sof_nau8825.c
+++ b/sound/soc/intel/boards/sof_nau8825.c
@@ -48,6 +48,7 @@
#define SOF_MAX98373_SPEAKER_AMP_PRESENT BIT(15)
#define SOF_MAX98360A_SPEAKER_AMP_PRESENT BIT(16)
#define SOF_RT1015P_SPEAKER_AMP_PRESENT BIT(17)
+#define SOF_NAU8318_SPEAKER_AMP_PRESENT BIT(18)
static unsigned long sof_nau8825_quirk = SOF_NAU8825_SSP_CODEC(0);
@@ -338,6 +339,13 @@ static struct snd_soc_dai_link_component rt1019p_component[] = {
}
};
+static struct snd_soc_dai_link_component nau8318_components[] = {
+ {
+ .name = "NVTN2012:00",
+ .dai_name = "nau8315-hifi",
+ }
+};
+
static struct snd_soc_dai_link_component dummy_component[] = {
{
.name = "snd-soc-dummy",
@@ -479,13 +487,16 @@ static struct snd_soc_dai_link *sof_card_dai_links_create(struct device *dev,
links[id].num_codecs = ARRAY_SIZE(max_98373_components);
links[id].init = max_98373_spk_codec_init;
links[id].ops = &max_98373_ops;
- /* feedback stream */
- links[id].dpcm_capture = 1;
} else if (sof_nau8825_quirk &
SOF_MAX98360A_SPEAKER_AMP_PRESENT) {
max_98360a_dai_link(&links[id]);
} else if (sof_nau8825_quirk & SOF_RT1015P_SPEAKER_AMP_PRESENT) {
sof_rt1015p_dai_link(&links[id]);
+ } else if (sof_nau8825_quirk &
+ SOF_NAU8318_SPEAKER_AMP_PRESENT) {
+ links[id].codecs = nau8318_components;
+ links[id].num_codecs = ARRAY_SIZE(nau8318_components);
+ links[id].init = speaker_codec_init;
} else {
goto devm_err;
}
@@ -493,6 +504,9 @@ static struct snd_soc_dai_link *sof_card_dai_links_create(struct device *dev,
links[id].platforms = platform_component;
links[id].num_platforms = ARRAY_SIZE(platform_component);
links[id].dpcm_playback = 1;
+ /* feedback stream or firmware-generated echo reference */
+ links[id].dpcm_capture = 1;
+
links[id].no_pcm = 1;
links[id].cpus = &cpus[id];
links[id].num_cpus = 1;
@@ -618,7 +632,7 @@ static const struct platform_device_id board_ids[] = {
},
{
- .name = "adl_rt1019p_nau8825",
+ .name = "adl_rt1019p_8825",
.driver_data = (kernel_ulong_t)(SOF_NAU8825_SSP_CODEC(0) |
SOF_SPEAKER_AMP_PRESENT |
SOF_RT1019P_SPEAKER_AMP_PRESENT |
@@ -626,7 +640,7 @@ static const struct platform_device_id board_ids[] = {
SOF_NAU8825_NUM_HDMIDEV(4)),
},
{
- .name = "adl_max98373_nau8825",
+ .name = "adl_max98373_8825",
.driver_data = (kernel_ulong_t)(SOF_NAU8825_SSP_CODEC(0) |
SOF_SPEAKER_AMP_PRESENT |
SOF_MAX98373_SPEAKER_AMP_PRESENT |
@@ -637,7 +651,7 @@ static const struct platform_device_id board_ids[] = {
},
{
/* The limitation of length of char array, shorten the name */
- .name = "adl_mx98360a_nau8825",
+ .name = "adl_mx98360a_8825",
.driver_data = (kernel_ulong_t)(SOF_NAU8825_SSP_CODEC(0) |
SOF_SPEAKER_AMP_PRESENT |
SOF_MAX98360A_SPEAKER_AMP_PRESENT |
@@ -648,7 +662,7 @@ static const struct platform_device_id board_ids[] = {
},
{
- .name = "adl_rt1015p_nau8825",
+ .name = "adl_rt1015p_8825",
.driver_data = (kernel_ulong_t)(SOF_NAU8825_SSP_CODEC(0) |
SOF_SPEAKER_AMP_PRESENT |
SOF_RT1015P_SPEAKER_AMP_PRESENT |
@@ -657,6 +671,16 @@ static const struct platform_device_id board_ids[] = {
SOF_BT_OFFLOAD_SSP(2) |
SOF_SSP_BT_OFFLOAD_PRESENT),
},
+ {
+ .name = "adl_nau8318_8825",
+ .driver_data = (kernel_ulong_t)(SOF_NAU8825_SSP_CODEC(0) |
+ SOF_SPEAKER_AMP_PRESENT |
+ SOF_NAU8318_SPEAKER_AMP_PRESENT |
+ SOF_NAU8825_SSP_AMP(1) |
+ SOF_NAU8825_NUM_HDMIDEV(4) |
+ SOF_BT_OFFLOAD_SSP(2) |
+ SOF_SSP_BT_OFFLOAD_PRESENT),
+ },
{ }
};
MODULE_DEVICE_TABLE(platform, board_ids);
diff --git a/sound/soc/intel/boards/sof_rt5682.c b/sound/soc/intel/boards/sof_rt5682.c
index 2eabc4b0fafa..71a11d747622 100644
--- a/sound/soc/intel/boards/sof_rt5682.c
+++ b/sound/soc/intel/boards/sof_rt5682.c
@@ -761,8 +761,6 @@ static struct snd_soc_dai_link *sof_card_dai_links_create(struct device *dev,
links[id].num_codecs = ARRAY_SIZE(max_98373_components);
links[id].init = max_98373_spk_codec_init;
links[id].ops = &max_98373_ops;
- /* feedback stream */
- links[id].dpcm_capture = 1;
} else if (sof_rt5682_quirk &
SOF_MAX98360A_SPEAKER_AMP_PRESENT) {
max_98360a_dai_link(&links[id]);
@@ -789,6 +787,9 @@ static struct snd_soc_dai_link *sof_card_dai_links_create(struct device *dev,
links[id].platforms = platform_component;
links[id].num_platforms = ARRAY_SIZE(platform_component);
links[id].dpcm_playback = 1;
+ /* feedback stream or firmware-generated echo reference */
+ links[id].dpcm_capture = 1;
+
links[id].no_pcm = 1;
links[id].cpus = &cpus[id];
links[id].num_cpus = 1;
diff --git a/sound/soc/intel/boards/sof_ssp_amp.c b/sound/soc/intel/boards/sof_ssp_amp.c
index 94d25aeb6e7c..7b74f122e340 100644
--- a/sound/soc/intel/boards/sof_ssp_amp.c
+++ b/sound/soc/intel/boards/sof_ssp_amp.c
@@ -258,13 +258,12 @@ static struct snd_soc_dai_link *sof_card_dai_links_create(struct device *dev,
sof_rt1308_dai_link(&links[id]);
} else if (sof_ssp_amp_quirk & SOF_CS35L41_SPEAKER_AMP_PRESENT) {
cs35l41_set_dai_link(&links[id]);
-
- /* feedback from amplifier */
- links[id].dpcm_capture = 1;
}
links[id].platforms = platform_component;
links[id].num_platforms = ARRAY_SIZE(platform_component);
links[id].dpcm_playback = 1;
+ /* feedback from amplifier or firmware-generated echo reference */
+ links[id].dpcm_capture = 1;
links[id].no_pcm = 1;
links[id].cpus = &cpus[id];
links[id].num_cpus = 1;
diff --git a/sound/soc/intel/common/soc-acpi-intel-adl-match.c b/sound/soc/intel/common/soc-acpi-intel-adl-match.c
index 60aee56f94bd..56ee5fef66a8 100644
--- a/sound/soc/intel/common/soc-acpi-intel-adl-match.c
+++ b/sound/soc/intel/common/soc-acpi-intel-adl-match.c
@@ -450,6 +450,11 @@ static const struct snd_soc_acpi_codecs adl_lt6911_hdmi = {
.codecs = {"INTC10B0"}
};
+static const struct snd_soc_acpi_codecs adl_nau8318_amp = {
+ .num_codecs = 1,
+ .codecs = {"NVTN2012"}
+};
+
struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_machines[] = {
{
.comp_ids = &adl_rt5682_rt5682s_hp,
@@ -474,21 +479,21 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_machines[] = {
},
{
.id = "10508825",
- .drv_name = "adl_rt1019p_nau8825",
+ .drv_name = "adl_rt1019p_8825",
.machine_quirk = snd_soc_acpi_codec_list,
.quirk_data = &adl_rt1019p_amp,
.sof_tplg_filename = "sof-adl-rt1019-nau8825.tplg",
},
{
.id = "10508825",
- .drv_name = "adl_max98373_nau8825",
+ .drv_name = "adl_max98373_8825",
.machine_quirk = snd_soc_acpi_codec_list,
.quirk_data = &adl_max98373_amp,
.sof_tplg_filename = "sof-adl-max98373-nau8825.tplg",
},
{
.id = "10508825",
- .drv_name = "adl_mx98360a_nau8825",
+ .drv_name = "adl_mx98360a_8825",
.machine_quirk = snd_soc_acpi_codec_list,
.quirk_data = &adl_max98360a_amp,
.sof_tplg_filename = "sof-adl-max98360a-nau8825.tplg",
@@ -502,13 +507,20 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_machines[] = {
},
{
.id = "10508825",
- .drv_name = "adl_rt1015p_nau8825",
+ .drv_name = "adl_rt1015p_8825",
.machine_quirk = snd_soc_acpi_codec_list,
.quirk_data = &adl_rt1015p_amp,
.sof_tplg_filename = "sof-adl-rt1015-nau8825.tplg",
},
{
.id = "10508825",
+ .drv_name = "adl_nau8318_8825",
+ .machine_quirk = snd_soc_acpi_codec_list,
+ .quirk_data = &adl_nau8318_amp,
+ .sof_tplg_filename = "sof-adl-nau8318-nau8825.tplg",
+ },
+ {
+ .id = "10508825",
.drv_name = "sof_nau8825",
.sof_tplg_filename = "sof-adl-nau8825.tplg",
},
diff --git a/sound/soc/intel/common/soc-acpi-intel-rpl-match.c b/sound/soc/intel/common/soc-acpi-intel-rpl-match.c
index 31b43116e3d8..07f96a11ea2f 100644
--- a/sound/soc/intel/common/soc-acpi-intel-rpl-match.c
+++ b/sound/soc/intel/common/soc-acpi-intel-rpl-match.c
@@ -203,6 +203,25 @@ static const struct snd_soc_acpi_link_adr rpl_sdw_rt711_link2_rt1316_link01_rt71
{}
};
+static const struct snd_soc_acpi_link_adr rpl_sdw_rt711_link2_rt1316_link01[] = {
+ {
+ .mask = BIT(2),
+ .num_adr = ARRAY_SIZE(rt711_sdca_2_adr),
+ .adr_d = rt711_sdca_2_adr,
+ },
+ {
+ .mask = BIT(0),
+ .num_adr = ARRAY_SIZE(rt1316_0_group2_adr),
+ .adr_d = rt1316_0_group2_adr,
+ },
+ {
+ .mask = BIT(1),
+ .num_adr = ARRAY_SIZE(rt1316_1_group2_adr),
+ .adr_d = rt1316_1_group2_adr,
+ },
+ {}
+};
+
static const struct snd_soc_acpi_link_adr rpl_sdw_rt711_link0_rt1318_link12_rt714_link3[] = {
{
.mask = BIT(0),
@@ -227,6 +246,25 @@ static const struct snd_soc_acpi_link_adr rpl_sdw_rt711_link0_rt1318_link12_rt71
{}
};
+static const struct snd_soc_acpi_link_adr rpl_sdw_rt711_link0_rt1318_link12[] = {
+ {
+ .mask = BIT(0),
+ .num_adr = ARRAY_SIZE(rt711_sdca_0_adr),
+ .adr_d = rt711_sdca_0_adr,
+ },
+ {
+ .mask = BIT(1),
+ .num_adr = ARRAY_SIZE(rt1318_1_group1_adr),
+ .adr_d = rt1318_1_group1_adr,
+ },
+ {
+ .mask = BIT(2),
+ .num_adr = ARRAY_SIZE(rt1318_2_group1_adr),
+ .adr_d = rt1318_2_group1_adr,
+ },
+ {}
+};
+
static const struct snd_soc_acpi_link_adr rpl_sdw_rt1316_link12_rt714_link0[] = {
{
.mask = BIT(1),
@@ -272,12 +310,24 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_rpl_sdw_machines[] = {
.sof_tplg_filename = "sof-rpl-rt711-l0-rt1318-l12-rt714-l3.tplg",
},
{
+ .link_mask = 0x7, /* rt711 on link0 & two rt1318s on link1 and link2 */
+ .links = rpl_sdw_rt711_link0_rt1318_link12,
+ .drv_name = "sof_sdw",
+ .sof_tplg_filename = "sof-rpl-rt711-l0-rt1318-l12.tplg",
+ },
+ {
.link_mask = 0x7, /* rt714 on link0 & two rt1316s on link1 and link2 */
.links = rpl_sdw_rt1316_link12_rt714_link0,
.drv_name = "sof_sdw",
.sof_tplg_filename = "sof-rpl-rt1316-l12-rt714-l0.tplg",
},
{
+ .link_mask = 0x7, /* rt711 on link2 & two rt1316s on link0 and link1 */
+ .links = rpl_sdw_rt711_link2_rt1316_link01,
+ .drv_name = "sof_sdw",
+ .sof_tplg_filename = "sof-rpl-rt711-l2-rt1316-l01.tplg",
+ },
+ {
.link_mask = 0x1, /* link0 required */
.links = rpl_rvp,
.drv_name = "sof_sdw",
diff --git a/sound/soc/mediatek/Kconfig b/sound/soc/mediatek/Kconfig
index 363fa4d47680..b027fba8233d 100644
--- a/sound/soc/mediatek/Kconfig
+++ b/sound/soc/mediatek/Kconfig
@@ -182,10 +182,12 @@ config SND_SOC_MT8186_MT6366_DA7219_MAX98357
If unsure select "N".
config SND_SOC_MT8186_MT6366_RT1019_RT5682S
- tristate "ASoC Audio driver for MT8186 with RT1019 RT5682S codec"
+ tristate "ASoC Audio driver for MT8186 with RT1019 RT5682S MAX98357A/MAX98360 codec"
depends on I2C && GPIOLIB
depends on SND_SOC_MT8186 && MTK_PMIC_WRAP
+ select SND_SOC_MAX98357A
select SND_SOC_MT6358
+ select SND_SOC_MAX98357A
select SND_SOC_RT1015P
select SND_SOC_RT5682S
select SND_SOC_BT_SCO
diff --git a/sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c b/sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c
index 8f77a0bc1dc8..af44e331dae8 100644
--- a/sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c
+++ b/sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c
@@ -1083,6 +1083,21 @@ static struct snd_soc_card mt8186_mt6366_rt1019_rt5682s_soc_card = {
.num_configs = ARRAY_SIZE(mt8186_mt6366_rt1019_rt5682s_codec_conf),
};
+static struct snd_soc_card mt8186_mt6366_rt5682s_max98360_soc_card = {
+ .name = "mt8186_rt5682s_max98360",
+ .owner = THIS_MODULE,
+ .dai_link = mt8186_mt6366_rt1019_rt5682s_dai_links,
+ .num_links = ARRAY_SIZE(mt8186_mt6366_rt1019_rt5682s_dai_links),
+ .controls = mt8186_mt6366_rt1019_rt5682s_controls,
+ .num_controls = ARRAY_SIZE(mt8186_mt6366_rt1019_rt5682s_controls),
+ .dapm_widgets = mt8186_mt6366_rt1019_rt5682s_widgets,
+ .num_dapm_widgets = ARRAY_SIZE(mt8186_mt6366_rt1019_rt5682s_widgets),
+ .dapm_routes = mt8186_mt6366_rt1019_rt5682s_routes,
+ .num_dapm_routes = ARRAY_SIZE(mt8186_mt6366_rt1019_rt5682s_routes),
+ .codec_conf = mt8186_mt6366_rt1019_rt5682s_codec_conf,
+ .num_configs = ARRAY_SIZE(mt8186_mt6366_rt1019_rt5682s_codec_conf),
+};
+
static int mt8186_mt6366_rt1019_rt5682s_dev_probe(struct platform_device *pdev)
{
struct snd_soc_card *card;
@@ -1232,9 +1247,14 @@ err_adsp_node:
#if IS_ENABLED(CONFIG_OF)
static const struct of_device_id mt8186_mt6366_rt1019_rt5682s_dt_match[] = {
- { .compatible = "mediatek,mt8186-mt6366-rt1019-rt5682s-sound",
+ {
+ .compatible = "mediatek,mt8186-mt6366-rt1019-rt5682s-sound",
.data = &mt8186_mt6366_rt1019_rt5682s_soc_card,
},
+ {
+ .compatible = "mediatek,mt8186-mt6366-rt5682s-max98360-sound",
+ .data = &mt8186_mt6366_rt5682s_max98360_soc_card,
+ },
{}
};
MODULE_DEVICE_TABLE(of, mt8186_mt6366_rt1019_rt5682s_dt_match);
diff --git a/sound/soc/qcom/Kconfig b/sound/soc/qcom/Kconfig
index 96a6d4731e6f..e7b00d1d9e99 100644
--- a/sound/soc/qcom/Kconfig
+++ b/sound/soc/qcom/Kconfig
@@ -2,7 +2,6 @@
menuconfig SND_SOC_QCOM
tristate "ASoC support for QCOM platforms"
depends on ARCH_QCOM || COMPILE_TEST
- imply SND_SOC_QCOM_COMMON
help
Say Y or M if you want to add support to use audio devices
in Qualcomm Technologies SOC-based platforms.
@@ -60,14 +59,16 @@ config SND_SOC_STORM
config SND_SOC_APQ8016_SBC
tristate "SoC Audio support for APQ8016 SBC platforms"
select SND_SOC_LPASS_APQ8016
- depends on SND_SOC_QCOM_COMMON
+ select SND_SOC_QCOM_COMMON
help
Support for Qualcomm Technologies LPASS audio block in
APQ8016 SOC-based systems.
Say Y if you want to use audio devices on MI2S.
config SND_SOC_QCOM_COMMON
- depends on SOUNDWIRE
+ tristate
+
+config SND_SOC_QCOM_SDW
tristate
config SND_SOC_QDSP6_COMMON
@@ -144,7 +145,7 @@ config SND_SOC_MSM8996
depends on QCOM_APR
depends on COMMON_CLK
select SND_SOC_QDSP6
- depends on SND_SOC_QCOM_COMMON
+ select SND_SOC_QCOM_COMMON
help
Support for Qualcomm Technologies LPASS audio block in
APQ8096 SoC-based systems.
@@ -155,7 +156,7 @@ config SND_SOC_SDM845
depends on QCOM_APR && I2C && SOUNDWIRE
depends on COMMON_CLK
select SND_SOC_QDSP6
- depends on SND_SOC_QCOM_COMMON
+ select SND_SOC_QCOM_COMMON
select SND_SOC_RT5663
select SND_SOC_MAX98927
imply SND_SOC_CROS_EC_CODEC
@@ -169,7 +170,8 @@ config SND_SOC_SM8250
depends on QCOM_APR && SOUNDWIRE
depends on COMMON_CLK
select SND_SOC_QDSP6
- depends on SND_SOC_QCOM_COMMON
+ select SND_SOC_QCOM_COMMON
+ select SND_SOC_QCOM_SDW
help
To add support for audio on Qualcomm Technologies Inc.
SM8250 SoC-based systems.
@@ -180,7 +182,8 @@ config SND_SOC_SC8280XP
depends on QCOM_APR && SOUNDWIRE
depends on COMMON_CLK
select SND_SOC_QDSP6
- depends on SND_SOC_QCOM_COMMON
+ select SND_SOC_QCOM_COMMON
+ select SND_SOC_QCOM_SDW
help
To add support for audio on Qualcomm Technologies Inc.
SC8280XP SoC-based systems.
@@ -190,7 +193,7 @@ config SND_SOC_SC7180
tristate "SoC Machine driver for SC7180 boards"
depends on I2C && GPIOLIB
depends on SOUNDWIRE || SOUNDWIRE=n
- depends on SND_SOC_QCOM_COMMON
+ select SND_SOC_QCOM_COMMON
select SND_SOC_LPASS_SC7180
select SND_SOC_MAX98357A
select SND_SOC_RT5682_I2C
@@ -204,7 +207,7 @@ config SND_SOC_SC7180
config SND_SOC_SC7280
tristate "SoC Machine driver for SC7280 boards"
depends on I2C && SOUNDWIRE
- depends on SND_SOC_QCOM_COMMON
+ select SND_SOC_QCOM_COMMON
select SND_SOC_LPASS_SC7280
select SND_SOC_MAX98357A
select SND_SOC_WCD938X_SDW
diff --git a/sound/soc/qcom/Makefile b/sound/soc/qcom/Makefile
index 8b97172cf990..254350d9dc06 100644
--- a/sound/soc/qcom/Makefile
+++ b/sound/soc/qcom/Makefile
@@ -28,6 +28,7 @@ snd-soc-sdm845-objs := sdm845.o
snd-soc-sm8250-objs := sm8250.o
snd-soc-sc8280xp-objs := sc8280xp.o
snd-soc-qcom-common-objs := common.o
+snd-soc-qcom-sdw-objs := sdw.o
obj-$(CONFIG_SND_SOC_STORM) += snd-soc-storm.o
obj-$(CONFIG_SND_SOC_APQ8016_SBC) += snd-soc-apq8016-sbc.o
@@ -38,6 +39,7 @@ obj-$(CONFIG_SND_SOC_SC8280XP) += snd-soc-sc8280xp.o
obj-$(CONFIG_SND_SOC_SDM845) += snd-soc-sdm845.o
obj-$(CONFIG_SND_SOC_SM8250) += snd-soc-sm8250.o
obj-$(CONFIG_SND_SOC_QCOM_COMMON) += snd-soc-qcom-common.o
+obj-$(CONFIG_SND_SOC_QCOM_SDW) += snd-soc-qcom-sdw.o
#DSP lib
obj-$(CONFIG_SND_SOC_QDSP6) += qdsp6/
diff --git a/sound/soc/qcom/common.c b/sound/soc/qcom/common.c
index 49c74c1662a3..96fe80241fb4 100644
--- a/sound/soc/qcom/common.c
+++ b/sound/soc/qcom/common.c
@@ -180,120 +180,6 @@ err_put_np:
}
EXPORT_SYMBOL_GPL(qcom_snd_parse_of);
-int qcom_snd_sdw_prepare(struct snd_pcm_substream *substream,
- struct sdw_stream_runtime *sruntime,
- bool *stream_prepared)
-{
- struct snd_soc_pcm_runtime *rtd = substream->private_data;
- struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0);
- int ret;
-
- if (!sruntime)
- return 0;
-
- switch (cpu_dai->id) {
- case WSA_CODEC_DMA_RX_0:
- case WSA_CODEC_DMA_RX_1:
- case RX_CODEC_DMA_RX_0:
- case RX_CODEC_DMA_RX_1:
- case TX_CODEC_DMA_TX_0:
- case TX_CODEC_DMA_TX_1:
- case TX_CODEC_DMA_TX_2:
- case TX_CODEC_DMA_TX_3:
- break;
- default:
- return 0;
- }
-
- if (*stream_prepared) {
- sdw_disable_stream(sruntime);
- sdw_deprepare_stream(sruntime);
- *stream_prepared = false;
- }
-
- ret = sdw_prepare_stream(sruntime);
- if (ret)
- return ret;
-
- /**
- * NOTE: there is a strict hw requirement about the ordering of port
- * enables and actual WSA881x PA enable. PA enable should only happen
- * after soundwire ports are enabled if not DC on the line is
- * accumulated resulting in Click/Pop Noise
- * PA enable/mute are handled as part of codec DAPM and digital mute.
- */
-
- ret = sdw_enable_stream(sruntime);
- if (ret) {
- sdw_deprepare_stream(sruntime);
- return ret;
- }
- *stream_prepared = true;
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(qcom_snd_sdw_prepare);
-
-int qcom_snd_sdw_hw_params(struct snd_pcm_substream *substream,
- struct snd_pcm_hw_params *params,
- struct sdw_stream_runtime **psruntime)
-{
- struct snd_soc_pcm_runtime *rtd = substream->private_data;
- struct snd_soc_dai *codec_dai;
- struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0);
- struct sdw_stream_runtime *sruntime;
- int i;
-
- switch (cpu_dai->id) {
- case WSA_CODEC_DMA_RX_0:
- case RX_CODEC_DMA_RX_0:
- case RX_CODEC_DMA_RX_1:
- case TX_CODEC_DMA_TX_0:
- case TX_CODEC_DMA_TX_1:
- case TX_CODEC_DMA_TX_2:
- case TX_CODEC_DMA_TX_3:
- for_each_rtd_codec_dais(rtd, i, codec_dai) {
- sruntime = snd_soc_dai_get_stream(codec_dai, substream->stream);
- if (sruntime != ERR_PTR(-ENOTSUPP))
- *psruntime = sruntime;
- }
- break;
- }
-
- return 0;
-
-}
-EXPORT_SYMBOL_GPL(qcom_snd_sdw_hw_params);
-
-int qcom_snd_sdw_hw_free(struct snd_pcm_substream *substream,
- struct sdw_stream_runtime *sruntime, bool *stream_prepared)
-{
- struct snd_soc_pcm_runtime *rtd = substream->private_data;
- struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0);
-
- switch (cpu_dai->id) {
- case WSA_CODEC_DMA_RX_0:
- case WSA_CODEC_DMA_RX_1:
- case RX_CODEC_DMA_RX_0:
- case RX_CODEC_DMA_RX_1:
- case TX_CODEC_DMA_TX_0:
- case TX_CODEC_DMA_TX_1:
- case TX_CODEC_DMA_TX_2:
- case TX_CODEC_DMA_TX_3:
- if (sruntime && *stream_prepared) {
- sdw_disable_stream(sruntime);
- sdw_deprepare_stream(sruntime);
- *stream_prepared = false;
- }
- break;
- default:
- break;
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(qcom_snd_sdw_hw_free);
-
int qcom_snd_wcd_jack_setup(struct snd_soc_pcm_runtime *rtd,
struct snd_soc_jack *jack, bool *jack_setup)
{
diff --git a/sound/soc/qcom/common.h b/sound/soc/qcom/common.h
index 3ef5bb6d12df..d7f80ee5ae26 100644
--- a/sound/soc/qcom/common.h
+++ b/sound/soc/qcom/common.h
@@ -5,19 +5,9 @@
#define __QCOM_SND_COMMON_H__
#include <sound/soc.h>
-#include <linux/soundwire/sdw.h>
int qcom_snd_parse_of(struct snd_soc_card *card);
int qcom_snd_wcd_jack_setup(struct snd_soc_pcm_runtime *rtd,
struct snd_soc_jack *jack, bool *jack_setup);
-int qcom_snd_sdw_prepare(struct snd_pcm_substream *substream,
- struct sdw_stream_runtime *runtime,
- bool *stream_prepared);
-int qcom_snd_sdw_hw_params(struct snd_pcm_substream *substream,
- struct snd_pcm_hw_params *params,
- struct sdw_stream_runtime **psruntime);
-int qcom_snd_sdw_hw_free(struct snd_pcm_substream *substream,
- struct sdw_stream_runtime *sruntime,
- bool *stream_prepared);
#endif
diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c
index 54353842dc07..dbdaaa85ce48 100644
--- a/sound/soc/qcom/lpass-cpu.c
+++ b/sound/soc/qcom/lpass-cpu.c
@@ -1037,10 +1037,11 @@ static void of_lpass_cpu_parse_dai_data(struct device *dev,
struct lpass_data *data)
{
struct device_node *node;
- int ret, id;
+ int ret, i, id;
/* Allow all channels by default for backwards compatibility */
- for (id = 0; id < data->variant->num_dai; id++) {
+ for (i = 0; i < data->variant->num_dai; i++) {
+ id = data->variant->dai_driver[i].id;
data->mi2s_playback_sd_mode[id] = LPAIF_I2SCTL_MODE_8CH;
data->mi2s_capture_sd_mode[id] = LPAIF_I2SCTL_MODE_8CH;
}
diff --git a/sound/soc/qcom/sc8280xp.c b/sound/soc/qcom/sc8280xp.c
index ade44ad7c585..14d9fea33d16 100644
--- a/sound/soc/qcom/sc8280xp.c
+++ b/sound/soc/qcom/sc8280xp.c
@@ -12,6 +12,7 @@
#include <linux/input-event-codes.h>
#include "qdsp6/q6afe.h"
#include "common.h"
+#include "sdw.h"
#define DRIVER_NAME "sc8280xp"
diff --git a/sound/soc/qcom/sdw.c b/sound/soc/qcom/sdw.c
new file mode 100644
index 000000000000..10249519a39e
--- /dev/null
+++ b/sound/soc/qcom/sdw.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018, Linaro Limited.
+// Copyright (c) 2018, The Linux Foundation. All rights reserved.
+
+#include <linux/module.h>
+#include <sound/soc.h>
+#include "qdsp6/q6afe.h"
+#include "sdw.h"
+
+int qcom_snd_sdw_prepare(struct snd_pcm_substream *substream,
+ struct sdw_stream_runtime *sruntime,
+ bool *stream_prepared)
+{
+ struct snd_soc_pcm_runtime *rtd = substream->private_data;
+ struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0);
+ int ret;
+
+ if (!sruntime)
+ return 0;
+
+ switch (cpu_dai->id) {
+ case WSA_CODEC_DMA_RX_0:
+ case WSA_CODEC_DMA_RX_1:
+ case RX_CODEC_DMA_RX_0:
+ case RX_CODEC_DMA_RX_1:
+ case TX_CODEC_DMA_TX_0:
+ case TX_CODEC_DMA_TX_1:
+ case TX_CODEC_DMA_TX_2:
+ case TX_CODEC_DMA_TX_3:
+ break;
+ default:
+ return 0;
+ }
+
+ if (*stream_prepared) {
+ sdw_disable_stream(sruntime);
+ sdw_deprepare_stream(sruntime);
+ *stream_prepared = false;
+ }
+
+ ret = sdw_prepare_stream(sruntime);
+ if (ret)
+ return ret;
+
+ /**
+ * NOTE: there is a strict hw requirement about the ordering of port
+ * enables and actual WSA881x PA enable. PA enable should only happen
+ * after soundwire ports are enabled if not DC on the line is
+ * accumulated resulting in Click/Pop Noise
+ * PA enable/mute are handled as part of codec DAPM and digital mute.
+ */
+
+ ret = sdw_enable_stream(sruntime);
+ if (ret) {
+ sdw_deprepare_stream(sruntime);
+ return ret;
+ }
+ *stream_prepared = true;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(qcom_snd_sdw_prepare);
+
+int qcom_snd_sdw_hw_params(struct snd_pcm_substream *substream,
+ struct snd_pcm_hw_params *params,
+ struct sdw_stream_runtime **psruntime)
+{
+ struct snd_soc_pcm_runtime *rtd = substream->private_data;
+ struct snd_soc_dai *codec_dai;
+ struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0);
+ struct sdw_stream_runtime *sruntime;
+ int i;
+
+ switch (cpu_dai->id) {
+ case WSA_CODEC_DMA_RX_0:
+ case RX_CODEC_DMA_RX_0:
+ case RX_CODEC_DMA_RX_1:
+ case TX_CODEC_DMA_TX_0:
+ case TX_CODEC_DMA_TX_1:
+ case TX_CODEC_DMA_TX_2:
+ case TX_CODEC_DMA_TX_3:
+ for_each_rtd_codec_dais(rtd, i, codec_dai) {
+ sruntime = snd_soc_dai_get_stream(codec_dai, substream->stream);
+ if (sruntime != ERR_PTR(-ENOTSUPP))
+ *psruntime = sruntime;
+ }
+ break;
+ }
+
+ return 0;
+
+}
+EXPORT_SYMBOL_GPL(qcom_snd_sdw_hw_params);
+
+int qcom_snd_sdw_hw_free(struct snd_pcm_substream *substream,
+ struct sdw_stream_runtime *sruntime, bool *stream_prepared)
+{
+ struct snd_soc_pcm_runtime *rtd = substream->private_data;
+ struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0);
+
+ switch (cpu_dai->id) {
+ case WSA_CODEC_DMA_RX_0:
+ case WSA_CODEC_DMA_RX_1:
+ case RX_CODEC_DMA_RX_0:
+ case RX_CODEC_DMA_RX_1:
+ case TX_CODEC_DMA_TX_0:
+ case TX_CODEC_DMA_TX_1:
+ case TX_CODEC_DMA_TX_2:
+ case TX_CODEC_DMA_TX_3:
+ if (sruntime && *stream_prepared) {
+ sdw_disable_stream(sruntime);
+ sdw_deprepare_stream(sruntime);
+ *stream_prepared = false;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(qcom_snd_sdw_hw_free);
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/qcom/sdw.h b/sound/soc/qcom/sdw.h
new file mode 100644
index 000000000000..d74cbb84da13
--- /dev/null
+++ b/sound/soc/qcom/sdw.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2018, The Linux Foundation. All rights reserved.
+
+#ifndef __QCOM_SND_SDW_H__
+#define __QCOM_SND_SDW_H__
+
+#include <linux/soundwire/sdw.h>
+
+int qcom_snd_sdw_prepare(struct snd_pcm_substream *substream,
+ struct sdw_stream_runtime *runtime,
+ bool *stream_prepared);
+int qcom_snd_sdw_hw_params(struct snd_pcm_substream *substream,
+ struct snd_pcm_hw_params *params,
+ struct sdw_stream_runtime **psruntime);
+int qcom_snd_sdw_hw_free(struct snd_pcm_substream *substream,
+ struct sdw_stream_runtime *sruntime,
+ bool *stream_prepared);
+#endif
diff --git a/sound/soc/qcom/sm8250.c b/sound/soc/qcom/sm8250.c
index 8dbe9ef41b1c..9626a9ef78c2 100644
--- a/sound/soc/qcom/sm8250.c
+++ b/sound/soc/qcom/sm8250.c
@@ -12,6 +12,7 @@
#include <linux/input-event-codes.h>
#include "qdsp6/q6afe.h"
#include "common.h"
+#include "sdw.h"
#define DRIVER_NAME "sm8250"
#define MI2S_BCLK_RATE 1536000
diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index c3be24b2fac5..a79a2fb260b8 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -1401,13 +1401,17 @@ static int soc_tplg_dapm_widget_create(struct soc_tplg *tplg,
template.num_kcontrols = le32_to_cpu(w->num_kcontrols);
kc = devm_kcalloc(tplg->dev, le32_to_cpu(w->num_kcontrols), sizeof(*kc), GFP_KERNEL);
- if (!kc)
+ if (!kc) {
+ ret = -ENOMEM;
goto hdr_err;
+ }
kcontrol_type = devm_kcalloc(tplg->dev, le32_to_cpu(w->num_kcontrols), sizeof(unsigned int),
GFP_KERNEL);
- if (!kcontrol_type)
+ if (!kcontrol_type) {
+ ret = -ENOMEM;
goto hdr_err;
+ }
for (i = 0; i < le32_to_cpu(w->num_kcontrols); i++) {
control_hdr = (struct snd_soc_tplg_ctl_hdr *)tplg->pos;
diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c
index 6bd2888fbb66..d5ccd4d09278 100644
--- a/sound/soc/sof/amd/acp.c
+++ b/sound/soc/sof/amd/acp.c
@@ -318,7 +318,6 @@ static irqreturn_t acp_irq_thread(int irq, void *context)
{
struct snd_sof_dev *sdev = context;
const struct sof_amd_acp_desc *desc = get_chip_info(sdev->pdata);
- unsigned int base = desc->dsp_intr_base;
unsigned int val, count = ACP_HW_SEM_RETRY_COUNT;
val = snd_sof_dsp_read(sdev, ACP_DSP_BAR, desc->ext_intr_stat);
@@ -328,28 +327,20 @@ static irqreturn_t acp_irq_thread(int irq, void *context)
return IRQ_HANDLED;
}
- val = snd_sof_dsp_read(sdev, ACP_DSP_BAR, base + DSP_SW_INTR_STAT_OFFSET);
- if (val & ACP_DSP_TO_HOST_IRQ) {
- while (snd_sof_dsp_read(sdev, ACP_DSP_BAR, desc->hw_semaphore_offset)) {
- /* Wait until acquired HW Semaphore lock or timeout */
- count--;
- if (!count) {
- dev_err(sdev->dev, "%s: Failed to acquire HW lock\n", __func__);
- return IRQ_NONE;
- }
+ while (snd_sof_dsp_read(sdev, ACP_DSP_BAR, desc->hw_semaphore_offset)) {
+ /* Wait until acquired HW Semaphore lock or timeout */
+ count--;
+ if (!count) {
+ dev_err(sdev->dev, "%s: Failed to acquire HW lock\n", __func__);
+ return IRQ_NONE;
}
-
- sof_ops(sdev)->irq_thread(irq, sdev);
- val |= ACP_DSP_TO_HOST_IRQ;
- snd_sof_dsp_write(sdev, ACP_DSP_BAR, base + DSP_SW_INTR_STAT_OFFSET, val);
-
- /* Unlock or Release HW Semaphore */
- snd_sof_dsp_write(sdev, ACP_DSP_BAR, desc->hw_semaphore_offset, 0x0);
-
- return IRQ_HANDLED;
}
- return IRQ_NONE;
+ sof_ops(sdev)->irq_thread(irq, sdev);
+ /* Unlock or Release HW Semaphore */
+ snd_sof_dsp_write(sdev, ACP_DSP_BAR, desc->hw_semaphore_offset, 0x0);
+
+ return IRQ_HANDLED;
};
static irqreturn_t acp_irq_handler(int irq, void *dev_id)
@@ -360,8 +351,11 @@ static irqreturn_t acp_irq_handler(int irq, void *dev_id)
unsigned int val;
val = snd_sof_dsp_read(sdev, ACP_DSP_BAR, base + DSP_SW_INTR_STAT_OFFSET);
- if (val)
+ if (val) {
+ val |= ACP_DSP_TO_HOST_IRQ;
+ snd_sof_dsp_write(sdev, ACP_DSP_BAR, base + DSP_SW_INTR_STAT_OFFSET, val);
return IRQ_WAKE_THREAD;
+ }
return IRQ_NONE;
}
diff --git a/sound/soc/sof/debug.c b/sound/soc/sof/debug.c
index d9a3ce7b69e1..ade0507328af 100644
--- a/sound/soc/sof/debug.c
+++ b/sound/soc/sof/debug.c
@@ -353,7 +353,9 @@ int snd_sof_dbg_init(struct snd_sof_dev *sdev)
return err;
}
- return 0;
+ return snd_sof_debugfs_buf_item(sdev, &sdev->fw_state,
+ sizeof(sdev->fw_state),
+ "fw_state", 0444);
}
EXPORT_SYMBOL_GPL(snd_sof_dbg_init);
diff --git a/sound/soc/sof/intel/hda-dai.c b/sound/soc/sof/intel/hda-dai.c
index 1c3d4887aa30..a642c3067ec5 100644
--- a/sound/soc/sof/intel/hda-dai.c
+++ b/sound/soc/sof/intel/hda-dai.c
@@ -216,6 +216,10 @@ static int hda_link_dma_hw_params(struct snd_pcm_substream *substream,
sdev = snd_soc_component_get_drvdata(cpu_dai->component);
bus = sof_to_bus(sdev);
+ hlink = snd_hdac_ext_bus_get_hlink_by_name(bus, codec_dai->component->name);
+ if (!hlink)
+ return -EINVAL;
+
hext_stream = snd_soc_dai_get_dma_data(cpu_dai, substream);
if (!hext_stream) {
hext_stream = hda_link_stream_assign(bus, substream);
@@ -225,10 +229,6 @@ static int hda_link_dma_hw_params(struct snd_pcm_substream *substream,
snd_soc_dai_set_dma_data(cpu_dai, substream, (void *)hext_stream);
}
- hlink = snd_hdac_ext_bus_get_hlink_by_name(bus, codec_dai->component->name);
- if (!hlink)
- return -EINVAL;
-
/* set the hdac_stream in the codec dai */
snd_soc_dai_set_stream(codec_dai, hdac_stream(hext_stream), substream->stream);
diff --git a/sound/soc/sof/ipc4-mtrace.c b/sound/soc/sof/ipc4-mtrace.c
index 70dea8ae706e..0ec6ef681012 100644
--- a/sound/soc/sof/ipc4-mtrace.c
+++ b/sound/soc/sof/ipc4-mtrace.c
@@ -344,9 +344,10 @@ static ssize_t sof_ipc4_priority_mask_dfs_write(struct file *file,
size_t count, loff_t *ppos)
{
struct sof_mtrace_priv *priv = file->private_data;
- int id, ret;
+ unsigned int id;
char *buf;
u32 mask;
+ int ret;
/*
* To update Nth mask entry, write:
@@ -357,9 +358,9 @@ static ssize_t sof_ipc4_priority_mask_dfs_write(struct file *file,
if (IS_ERR(buf))
return PTR_ERR(buf);
- ret = sscanf(buf, "%d,0x%x", &id, &mask);
+ ret = sscanf(buf, "%u,0x%x", &id, &mask);
if (ret != 2) {
- ret = sscanf(buf, "%d,%x", &id, &mask);
+ ret = sscanf(buf, "%u,%x", &id, &mask);
if (ret != 2) {
ret = -EINVAL;
goto out;
diff --git a/sound/soc/sof/ops.h b/sound/soc/sof/ops.h
index c52752250565..3b3f3cf7af38 100644
--- a/sound/soc/sof/ops.h
+++ b/sound/soc/sof/ops.h
@@ -357,7 +357,7 @@ static inline u64 snd_sof_dsp_read64(struct snd_sof_dev *sdev, u32 bar,
}
static inline void snd_sof_dsp_update8(struct snd_sof_dev *sdev, u32 bar,
- u32 offset, u8 value, u8 mask)
+ u32 offset, u8 mask, u8 value)
{
u8 reg;
diff --git a/sound/soc/sof/pm.c b/sound/soc/sof/pm.c
index df740be645e8..8722bbd7fd3d 100644
--- a/sound/soc/sof/pm.c
+++ b/sound/soc/sof/pm.c
@@ -182,7 +182,7 @@ static int sof_suspend(struct device *dev, bool runtime_suspend)
const struct sof_ipc_pm_ops *pm_ops = sdev->ipc->ops->pm;
const struct sof_ipc_tplg_ops *tplg_ops = sdev->ipc->ops->tplg;
pm_message_t pm_state;
- u32 target_state = 0;
+ u32 target_state = snd_sof_dsp_power_target(sdev);
int ret;
/* do nothing if dsp suspend callback is not set */
@@ -192,6 +192,9 @@ static int sof_suspend(struct device *dev, bool runtime_suspend)
if (runtime_suspend && !sof_ops(sdev)->runtime_suspend)
return 0;
+ if (tplg_ops && tplg_ops->tear_down_all_pipelines)
+ tplg_ops->tear_down_all_pipelines(sdev, false);
+
if (sdev->fw_state != SOF_FW_BOOT_COMPLETE)
goto suspend;
@@ -206,7 +209,6 @@ static int sof_suspend(struct device *dev, bool runtime_suspend)
}
}
- target_state = snd_sof_dsp_power_target(sdev);
pm_state.event = target_state;
/* Skip to platform-specific suspend if DSP is entering D0 */
@@ -217,9 +219,6 @@ static int sof_suspend(struct device *dev, bool runtime_suspend)
goto suspend;
}
- if (tplg_ops->tear_down_all_pipelines)
- tplg_ops->tear_down_all_pipelines(sdev, false);
-
/* suspend DMA trace */
sof_fw_trace_suspend(sdev, pm_state);
diff --git a/sound/soc/sof/sof-audio.c b/sound/soc/sof/sof-audio.c
index 7306a2649857..865c367eb2f2 100644
--- a/sound/soc/sof/sof-audio.c
+++ b/sound/soc/sof/sof-audio.c
@@ -271,9 +271,9 @@ sof_unprepare_widgets_in_path(struct snd_sof_dev *sdev, struct snd_soc_dapm_widg
struct snd_sof_widget *swidget = widget->dobj.private;
struct snd_soc_dapm_path *p;
- /* return if the widget is in use or if it is already unprepared */
- if (!swidget->prepared || swidget->use_count > 1)
- return;
+ /* skip if the widget is in use or if it is already unprepared */
+ if (!swidget || !swidget->prepared || swidget->use_count > 0)
+ goto sink_unprepare;
if (widget_ops[widget->id].ipc_unprepare)
/* unprepare the source widget */
@@ -281,6 +281,7 @@ sof_unprepare_widgets_in_path(struct snd_sof_dev *sdev, struct snd_soc_dapm_widg
swidget->prepared = false;
+sink_unprepare:
/* unprepare all widgets in the sink paths */
snd_soc_dapm_widget_for_each_sink_path(widget, p) {
if (!p->walking && p->sink->dobj.private) {
@@ -303,7 +304,7 @@ sof_prepare_widgets_in_path(struct snd_sof_dev *sdev, struct snd_soc_dapm_widget
struct snd_soc_dapm_path *p;
int ret;
- if (!widget_ops[widget->id].ipc_prepare || swidget->prepared)
+ if (!swidget || !widget_ops[widget->id].ipc_prepare || swidget->prepared)
goto sink_prepare;
/* prepare the source widget */
@@ -326,7 +327,8 @@ sink_prepare:
p->walking = false;
if (ret < 0) {
/* unprepare the source widget */
- if (widget_ops[widget->id].ipc_unprepare && swidget->prepared) {
+ if (widget_ops[widget->id].ipc_unprepare &&
+ swidget && swidget->prepared) {
widget_ops[widget->id].ipc_unprepare(swidget);
swidget->prepared = false;
}
@@ -429,11 +431,11 @@ sof_walk_widgets_in_order(struct snd_sof_dev *sdev, struct snd_soc_dapm_widget_l
for_each_dapm_widgets(list, i, widget) {
/* starting widget for playback is AIF type */
- if (dir == SNDRV_PCM_STREAM_PLAYBACK && !WIDGET_IS_AIF(widget->id))
+ if (dir == SNDRV_PCM_STREAM_PLAYBACK && widget->id != snd_soc_dapm_aif_in)
continue;
/* starting widget for capture is DAI type */
- if (dir == SNDRV_PCM_STREAM_CAPTURE && !WIDGET_IS_DAI(widget->id))
+ if (dir == SNDRV_PCM_STREAM_CAPTURE && widget->id != snd_soc_dapm_dai_out)
continue;
switch (op) {
diff --git a/sound/synth/emux/emux_nrpn.c b/sound/synth/emux/emux_nrpn.c
index 8056422ed7c5..0d6b82ae2955 100644
--- a/sound/synth/emux/emux_nrpn.c
+++ b/sound/synth/emux/emux_nrpn.c
@@ -349,6 +349,9 @@ int
snd_emux_xg_control(struct snd_emux_port *port, struct snd_midi_channel *chan,
int param)
{
+ if (param >= ARRAY_SIZE(chan->control))
+ return -EINVAL;
+
return send_converted_effect(xg_effects, ARRAY_SIZE(xg_effects),
port, chan, param,
chan->control[param],
diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c
index 41ac7185b42b..4727043fd745 100644
--- a/sound/usb/implicit.c
+++ b/sound/usb/implicit.c
@@ -471,7 +471,7 @@ snd_usb_find_implicit_fb_sync_format(struct snd_usb_audio *chip,
subs = find_matching_substream(chip, stream, target->sync_ep,
target->fmt_type);
if (!subs)
- return sync_fmt;
+ goto end;
high_score = 0;
list_for_each_entry(fp, &subs->fmt_list, list) {
@@ -485,6 +485,7 @@ snd_usb_find_implicit_fb_sync_format(struct snd_usb_audio *chip,
}
}
+ end:
if (fixed_rate)
*fixed_rate = snd_usb_pcm_has_fixed_rate(subs);
return sync_fmt;
diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index 99a66d0ef5b2..d959da7a1afb 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -160,9 +160,12 @@ find_substream_format(struct snd_usb_substream *subs,
bool snd_usb_pcm_has_fixed_rate(struct snd_usb_substream *subs)
{
const struct audioformat *fp;
- struct snd_usb_audio *chip = subs->stream->chip;
+ struct snd_usb_audio *chip;
int rate = -1;
+ if (!subs)
+ return false;
+ chip = subs->stream->chip;
if (!(chip->quirk_flags & QUIRK_FLAG_FIXED_RATE))
return false;
list_for_each_entry(fp, &subs->fmt_list, list) {
@@ -525,6 +528,8 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream,
if (snd_usb_endpoint_compatible(chip, subs->data_endpoint,
fmt, hw_params))
goto unlock;
+ if (stop_endpoints(subs, false))
+ sync_pending_stops(subs);
close_endpoints(chip, subs);
}
@@ -787,11 +792,27 @@ static int apply_hw_params_minmax(struct snd_interval *it, unsigned int rmin,
return changed;
}
+/* get the specified endpoint object that is being used by other streams
+ * (i.e. the parameter is locked)
+ */
+static const struct snd_usb_endpoint *
+get_endpoint_in_use(struct snd_usb_audio *chip, int endpoint,
+ const struct snd_usb_endpoint *ref_ep)
+{
+ const struct snd_usb_endpoint *ep;
+
+ ep = snd_usb_get_endpoint(chip, endpoint);
+ if (ep && ep->cur_audiofmt && (ep != ref_ep || ep->opened > 1))
+ return ep;
+ return NULL;
+}
+
static int hw_rule_rate(struct snd_pcm_hw_params *params,
struct snd_pcm_hw_rule *rule)
{
struct snd_usb_substream *subs = rule->private;
struct snd_usb_audio *chip = subs->stream->chip;
+ const struct snd_usb_endpoint *ep;
const struct audioformat *fp;
struct snd_interval *it = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE);
unsigned int rmin, rmax, r;
@@ -803,6 +824,29 @@ static int hw_rule_rate(struct snd_pcm_hw_params *params,
list_for_each_entry(fp, &subs->fmt_list, list) {
if (!hw_check_valid_format(subs, params, fp))
continue;
+
+ ep = get_endpoint_in_use(chip, fp->endpoint,
+ subs->data_endpoint);
+ if (ep) {
+ hwc_debug("rate limit %d for ep#%x\n",
+ ep->cur_rate, fp->endpoint);
+ rmin = min(rmin, ep->cur_rate);
+ rmax = max(rmax, ep->cur_rate);
+ continue;
+ }
+
+ if (fp->implicit_fb) {
+ ep = get_endpoint_in_use(chip, fp->sync_ep,
+ subs->sync_endpoint);
+ if (ep) {
+ hwc_debug("rate limit %d for sync_ep#%x\n",
+ ep->cur_rate, fp->sync_ep);
+ rmin = min(rmin, ep->cur_rate);
+ rmax = max(rmax, ep->cur_rate);
+ continue;
+ }
+ }
+
r = snd_usb_endpoint_get_clock_rate(chip, fp->clock);
if (r > 0) {
if (!snd_interval_test(it, r))
@@ -872,6 +916,8 @@ static int hw_rule_format(struct snd_pcm_hw_params *params,
struct snd_pcm_hw_rule *rule)
{
struct snd_usb_substream *subs = rule->private;
+ struct snd_usb_audio *chip = subs->stream->chip;
+ const struct snd_usb_endpoint *ep;
const struct audioformat *fp;
struct snd_mask *fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT);
u64 fbits;
@@ -881,6 +927,27 @@ static int hw_rule_format(struct snd_pcm_hw_params *params,
list_for_each_entry(fp, &subs->fmt_list, list) {
if (!hw_check_valid_format(subs, params, fp))
continue;
+
+ ep = get_endpoint_in_use(chip, fp->endpoint,
+ subs->data_endpoint);
+ if (ep) {
+ hwc_debug("format limit %d for ep#%x\n",
+ ep->cur_format, fp->endpoint);
+ fbits |= pcm_format_to_bits(ep->cur_format);
+ continue;
+ }
+
+ if (fp->implicit_fb) {
+ ep = get_endpoint_in_use(chip, fp->sync_ep,
+ subs->sync_endpoint);
+ if (ep) {
+ hwc_debug("format limit %d for sync_ep#%x\n",
+ ep->cur_format, fp->sync_ep);
+ fbits |= pcm_format_to_bits(ep->cur_format);
+ continue;
+ }
+ }
+
fbits |= fp->formats;
}
return apply_hw_params_format_bits(fmt, fbits);
@@ -913,98 +980,95 @@ static int hw_rule_period_time(struct snd_pcm_hw_params *params,
return apply_hw_params_minmax(it, pmin, UINT_MAX);
}
-/* get the EP or the sync EP for implicit fb when it's already set up */
-static const struct snd_usb_endpoint *
-get_sync_ep_from_substream(struct snd_usb_substream *subs)
-{
- struct snd_usb_audio *chip = subs->stream->chip;
- const struct audioformat *fp;
- const struct snd_usb_endpoint *ep;
-
- list_for_each_entry(fp, &subs->fmt_list, list) {
- ep = snd_usb_get_endpoint(chip, fp->endpoint);
- if (ep && ep->cur_audiofmt) {
- /* if EP is already opened solely for this substream,
- * we still allow us to change the parameter; otherwise
- * this substream has to follow the existing parameter
- */
- if (ep->cur_audiofmt != subs->cur_audiofmt || ep->opened > 1)
- return ep;
- }
- if (!fp->implicit_fb)
- continue;
- /* for the implicit fb, check the sync ep as well */
- ep = snd_usb_get_endpoint(chip, fp->sync_ep);
- if (ep && ep->cur_audiofmt)
- return ep;
- }
- return NULL;
-}
-
/* additional hw constraints for implicit feedback mode */
-static int hw_rule_format_implicit_fb(struct snd_pcm_hw_params *params,
- struct snd_pcm_hw_rule *rule)
-{
- struct snd_usb_substream *subs = rule->private;
- const struct snd_usb_endpoint *ep;
- struct snd_mask *fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT);
-
- ep = get_sync_ep_from_substream(subs);
- if (!ep)
- return 0;
-
- hwc_debug("applying %s\n", __func__);
- return apply_hw_params_format_bits(fmt, pcm_format_to_bits(ep->cur_format));
-}
-
-static int hw_rule_rate_implicit_fb(struct snd_pcm_hw_params *params,
- struct snd_pcm_hw_rule *rule)
-{
- struct snd_usb_substream *subs = rule->private;
- const struct snd_usb_endpoint *ep;
- struct snd_interval *it;
-
- ep = get_sync_ep_from_substream(subs);
- if (!ep)
- return 0;
-
- hwc_debug("applying %s\n", __func__);
- it = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE);
- return apply_hw_params_minmax(it, ep->cur_rate, ep->cur_rate);
-}
-
static int hw_rule_period_size_implicit_fb(struct snd_pcm_hw_params *params,
struct snd_pcm_hw_rule *rule)
{
struct snd_usb_substream *subs = rule->private;
+ struct snd_usb_audio *chip = subs->stream->chip;
+ const struct audioformat *fp;
const struct snd_usb_endpoint *ep;
struct snd_interval *it;
+ unsigned int rmin, rmax;
- ep = get_sync_ep_from_substream(subs);
- if (!ep)
- return 0;
-
- hwc_debug("applying %s\n", __func__);
it = hw_param_interval(params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE);
- return apply_hw_params_minmax(it, ep->cur_period_frames,
- ep->cur_period_frames);
+ hwc_debug("hw_rule_period_size: (%u,%u)\n", it->min, it->max);
+ rmin = UINT_MAX;
+ rmax = 0;
+ list_for_each_entry(fp, &subs->fmt_list, list) {
+ if (!hw_check_valid_format(subs, params, fp))
+ continue;
+ ep = get_endpoint_in_use(chip, fp->endpoint,
+ subs->data_endpoint);
+ if (ep) {
+ hwc_debug("period size limit %d for ep#%x\n",
+ ep->cur_period_frames, fp->endpoint);
+ rmin = min(rmin, ep->cur_period_frames);
+ rmax = max(rmax, ep->cur_period_frames);
+ continue;
+ }
+
+ if (fp->implicit_fb) {
+ ep = get_endpoint_in_use(chip, fp->sync_ep,
+ subs->sync_endpoint);
+ if (ep) {
+ hwc_debug("period size limit %d for sync_ep#%x\n",
+ ep->cur_period_frames, fp->sync_ep);
+ rmin = min(rmin, ep->cur_period_frames);
+ rmax = max(rmax, ep->cur_period_frames);
+ continue;
+ }
+ }
+ }
+
+ if (!rmax)
+ return 0; /* no limit by implicit fb */
+ return apply_hw_params_minmax(it, rmin, rmax);
}
static int hw_rule_periods_implicit_fb(struct snd_pcm_hw_params *params,
struct snd_pcm_hw_rule *rule)
{
struct snd_usb_substream *subs = rule->private;
+ struct snd_usb_audio *chip = subs->stream->chip;
+ const struct audioformat *fp;
const struct snd_usb_endpoint *ep;
struct snd_interval *it;
+ unsigned int rmin, rmax;
- ep = get_sync_ep_from_substream(subs);
- if (!ep)
- return 0;
-
- hwc_debug("applying %s\n", __func__);
it = hw_param_interval(params, SNDRV_PCM_HW_PARAM_PERIODS);
- return apply_hw_params_minmax(it, ep->cur_buffer_periods,
- ep->cur_buffer_periods);
+ hwc_debug("hw_rule_periods: (%u,%u)\n", it->min, it->max);
+ rmin = UINT_MAX;
+ rmax = 0;
+ list_for_each_entry(fp, &subs->fmt_list, list) {
+ if (!hw_check_valid_format(subs, params, fp))
+ continue;
+ ep = get_endpoint_in_use(chip, fp->endpoint,
+ subs->data_endpoint);
+ if (ep) {
+ hwc_debug("periods limit %d for ep#%x\n",
+ ep->cur_buffer_periods, fp->endpoint);
+ rmin = min(rmin, ep->cur_buffer_periods);
+ rmax = max(rmax, ep->cur_buffer_periods);
+ continue;
+ }
+
+ if (fp->implicit_fb) {
+ ep = get_endpoint_in_use(chip, fp->sync_ep,
+ subs->sync_endpoint);
+ if (ep) {
+ hwc_debug("periods limit %d for sync_ep#%x\n",
+ ep->cur_buffer_periods, fp->sync_ep);
+ rmin = min(rmin, ep->cur_buffer_periods);
+ rmax = max(rmax, ep->cur_buffer_periods);
+ continue;
+ }
+ }
+ }
+
+ if (!rmax)
+ return 0; /* no limit by implicit fb */
+ return apply_hw_params_minmax(it, rmin, rmax);
}
/*
@@ -1113,16 +1177,6 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre
return err;
/* additional hw constraints for implicit fb */
- err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FORMAT,
- hw_rule_format_implicit_fb, subs,
- SNDRV_PCM_HW_PARAM_FORMAT, -1);
- if (err < 0)
- return err;
- err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE,
- hw_rule_rate_implicit_fb, subs,
- SNDRV_PCM_HW_PARAM_RATE, -1);
- if (err < 0)
- return err;
err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE,
hw_rule_period_size_implicit_fb, subs,
SNDRV_PCM_HW_PARAM_PERIOD_SIZE, -1);
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 3d13fdf7590c..3ecd1ba7fd4b 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -2152,6 +2152,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
QUIRK_FLAG_GENERIC_IMPLICIT_FB),
DEVICE_FLG(0x0525, 0xa4ad, /* Hamedal C20 usb camero */
QUIRK_FLAG_IFACE_SKIP_CLOSE),
+ DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */
+ QUIRK_FLAG_FIXED_RATE),
DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */
QUIRK_FLAG_FIXED_RATE),
diff --git a/sound/usb/stream.c b/sound/usb/stream.c
index f75601ca2d52..f10f4e6d3fb8 100644
--- a/sound/usb/stream.c
+++ b/sound/usb/stream.c
@@ -1222,6 +1222,12 @@ static int __snd_usb_parse_audio_interface(struct snd_usb_audio *chip,
if (err < 0)
return err;
}
+
+ /* try to set the interface... */
+ usb_set_interface(chip->dev, iface_no, 0);
+ snd_usb_init_pitch(chip, fp);
+ snd_usb_init_sample_rate(chip, fp, fp->rate_max);
+ usb_set_interface(chip->dev, iface_no, altno);
}
return 0;
}
diff --git a/sound/xen/xen_snd_front.c b/sound/xen/xen_snd_front.c
index 4041748c12e5..b66e037710d0 100644
--- a/sound/xen/xen_snd_front.c
+++ b/sound/xen/xen_snd_front.c
@@ -311,7 +311,7 @@ static int xen_drv_probe(struct xenbus_device *xb_dev,
return xenbus_switch_state(xb_dev, XenbusStateInitialising);
}
-static int xen_drv_remove(struct xenbus_device *dev)
+static void xen_drv_remove(struct xenbus_device *dev)
{
struct xen_snd_front_info *front_info = dev_get_drvdata(&dev->dev);
int to = 100;
@@ -345,7 +345,6 @@ static int xen_drv_remove(struct xenbus_device *dev)
xen_snd_drv_fini(front_info);
xenbus_frontend_closed(dev);
- return 0;
}
static const struct xenbus_device_id xen_drv_ids[] = {
diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h
index abc418650fec..683ca3af4084 100644
--- a/tools/arch/arm64/include/asm/cputype.h
+++ b/tools/arch/arm64/include/asm/cputype.h
@@ -41,7 +41,7 @@
(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
#define MIDR_CPU_MODEL(imp, partnum) \
- (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \
+ ((_AT(u32, imp) << MIDR_IMPLEMENTOR_SHIFT) | \
(0xf << MIDR_ARCHITECTURE_SHIFT) | \
((partnum) << MIDR_PARTNUM_SHIFT))
@@ -80,6 +80,7 @@
#define ARM_CPU_PART_CORTEX_X1 0xD44
#define ARM_CPU_PART_CORTEX_A510 0xD46
#define ARM_CPU_PART_CORTEX_A710 0xD47
+#define ARM_CPU_PART_CORTEX_A715 0xD4D
#define ARM_CPU_PART_CORTEX_X2 0xD48
#define ARM_CPU_PART_NEOVERSE_N2 0xD49
#define ARM_CPU_PART_CORTEX_A78C 0xD4B
@@ -123,6 +124,8 @@
#define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025
#define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028
#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029
+#define APPLE_CPU_PART_M2_BLIZZARD 0x032
+#define APPLE_CPU_PART_M2_AVALANCHE 0x033
#define AMPERE_CPU_PART_AMPERE1 0xAC3
@@ -142,6 +145,7 @@
#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
+#define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715)
#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2)
#define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C)
@@ -175,6 +179,8 @@
#define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO)
#define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX)
#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
+#define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD)
+#define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE)
#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 316917b98707..a7a857f1784d 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -43,6 +43,7 @@
#define __KVM_HAVE_VCPU_EVENTS
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+#define KVM_DIRTY_LOG_PAGE_OFFSET 64
#define KVM_REG_SIZE(id) \
(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 649e50a8f9dd..e48deab8901d 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -206,6 +206,8 @@ struct kvm_msr_list {
struct kvm_msr_filter_range {
#define KVM_MSR_FILTER_READ (1 << 0)
#define KVM_MSR_FILTER_WRITE (1 << 1)
+#define KVM_MSR_FILTER_RANGE_VALID_MASK (KVM_MSR_FILTER_READ | \
+ KVM_MSR_FILTER_WRITE)
__u32 flags;
__u32 nmsrs; /* number of msrs in bitmap */
__u32 base; /* MSR index the bitmap starts at */
@@ -214,8 +216,11 @@ struct kvm_msr_filter_range {
#define KVM_MSR_FILTER_MAX_RANGES 16
struct kvm_msr_filter {
+#ifndef __KERNEL__
#define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0)
+#endif
#define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0)
+#define KVM_MSR_FILTER_VALID_MASK (KVM_MSR_FILTER_DEFAULT_DENY)
__u32 flags;
struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES];
};
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 787b857d3fb5..f610e184ce02 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -145,6 +145,10 @@ ifeq ($(feature-llvm),1)
LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets
CFLAGS += $(shell $(LLVM_CONFIG) --cflags --libs $(LLVM_CONFIG_LIB_COMPONENTS))
LIBS += $(shell $(LLVM_CONFIG) --libs $(LLVM_CONFIG_LIB_COMPONENTS))
+ ifeq ($(shell $(LLVM_CONFIG) --shared-mode),static)
+ LIBS += $(shell $(LLVM_CONFIG) --system-libs $(LLVM_CONFIG_LIB_COMPONENTS))
+ LIBS += -lstdc++
+ endif
LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags)
else
# Fall back on libbfd
diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c
index 6c122952c589..5dee2b98ab60 100644
--- a/tools/gpio/gpio-event-mon.c
+++ b/tools/gpio/gpio-event-mon.c
@@ -86,6 +86,7 @@ int monitor_device(const char *device_name,
gpiotools_test_bit(values.bits, i));
}
+ i = 0;
while (1) {
struct gpio_v2_line_event event;
diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h
index cc7070c7439b..b4898ff085de 100644
--- a/tools/include/linux/build_bug.h
+++ b/tools/include/linux/build_bug.h
@@ -79,4 +79,13 @@
#define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
#endif // static_assert
+
+/*
+ * Compile time check that field has an expected offset
+ */
+#define ASSERT_STRUCT_OFFSET(type, field, expected_offset) \
+ BUILD_BUG_ON_MSG(offsetof(type, field) != (expected_offset), \
+ "Offset of " #field " in " #type " has changed.")
+
+
#endif /* _LINUX_BUILD_BUG_H */
diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
index 5fc5b8029bff..7380093ba9e7 100644
--- a/tools/include/nolibc/arch-mips.h
+++ b/tools/include/nolibc/arch-mips.h
@@ -192,6 +192,7 @@ struct sys_stat_struct {
__asm__ (".section .text\n"
".weak __start\n"
".set nomips16\n"
+ ".set push\n"
".set noreorder\n"
".option pic0\n"
".ent __start\n"
@@ -210,6 +211,7 @@ __asm__ (".section .text\n"
"li $v0, 4001\n" // NR_exit == 4001
"syscall\n"
".end __start\n"
+ ".set pop\n"
"");
#endif // _NOLIBC_ARCH_MIPS_H
diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
index ba04771cb3a3..a3bdd9803f8c 100644
--- a/tools/include/nolibc/arch-riscv.h
+++ b/tools/include/nolibc/arch-riscv.h
@@ -11,13 +11,13 @@
#define O_RDONLY 0
#define O_WRONLY 1
#define O_RDWR 2
-#define O_CREAT 0x100
-#define O_EXCL 0x200
-#define O_NOCTTY 0x400
-#define O_TRUNC 0x1000
-#define O_APPEND 0x2000
-#define O_NONBLOCK 0x4000
-#define O_DIRECTORY 0x200000
+#define O_CREAT 0x40
+#define O_EXCL 0x80
+#define O_NOCTTY 0x100
+#define O_TRUNC 0x200
+#define O_APPEND 0x400
+#define O_NONBLOCK 0x800
+#define O_DIRECTORY 0x10000
struct sys_stat_struct {
unsigned long st_dev; /* Device. */
diff --git a/tools/include/nolibc/ctype.h b/tools/include/nolibc/ctype.h
index e3000b2992d7..6f90706d0644 100644
--- a/tools/include/nolibc/ctype.h
+++ b/tools/include/nolibc/ctype.h
@@ -96,4 +96,7 @@ int ispunct(int c)
return isgraph(c) && !isalnum(c);
}
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#endif /* _NOLIBC_CTYPE_H */
diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h
index 06893d6dfb7a..9dc4919c769b 100644
--- a/tools/include/nolibc/errno.h
+++ b/tools/include/nolibc/errno.h
@@ -24,4 +24,7 @@ static int errno;
*/
#define MAX_ERRNO 4095
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#endif /* _NOLIBC_ERRNO_H */
diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h
index ef47e71e2be3..137552216e46 100644
--- a/tools/include/nolibc/signal.h
+++ b/tools/include/nolibc/signal.h
@@ -19,4 +19,7 @@ int raise(int signal)
return sys_kill(sys_getpid(), signal);
}
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#endif /* _NOLIBC_SIGNAL_H */
diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h
index a3cebc4bc3ac..96ac8afc5aee 100644
--- a/tools/include/nolibc/stdio.h
+++ b/tools/include/nolibc/stdio.h
@@ -303,4 +303,7 @@ void perror(const char *msg)
fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno);
}
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#endif /* _NOLIBC_STDIO_H */
diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h
index 92378c4b9660..a24000d1e822 100644
--- a/tools/include/nolibc/stdlib.h
+++ b/tools/include/nolibc/stdlib.h
@@ -419,4 +419,7 @@ char *u64toa(uint64_t in)
return itoa_buffer;
}
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#endif /* _NOLIBC_STDLIB_H */
diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h
index ad97c0d522b8..fffdaf6ff467 100644
--- a/tools/include/nolibc/string.h
+++ b/tools/include/nolibc/string.h
@@ -88,8 +88,11 @@ void *memset(void *dst, int b, size_t len)
{
char *p = dst;
- while (len--)
+ while (len--) {
+ /* prevent gcc from recognizing memset() here */
+ asm volatile("");
*(p++) = b;
+ }
return dst;
}
@@ -285,4 +288,7 @@ char *strrchr(const char *s, int c)
return (char *)ret;
}
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#endif /* _NOLIBC_STRING_H */
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index ce3ee03aa679..78473d34e27c 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -1243,5 +1243,7 @@ ssize_t write(int fd, const void *buf, size_t count)
return ret;
}
+/* make sure to include all global symbols */
+#include "nolibc.h"
#endif /* _NOLIBC_SYS_H */
diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h
index d18b7661fdd7..84655361b9ad 100644
--- a/tools/include/nolibc/time.h
+++ b/tools/include/nolibc/time.h
@@ -25,4 +25,7 @@ time_t time(time_t *tptr)
return tv.tv_sec;
}
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#endif /* _NOLIBC_TIME_H */
diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h
index 959997034e55..fbbc0e68c001 100644
--- a/tools/include/nolibc/types.h
+++ b/tools/include/nolibc/types.h
@@ -26,13 +26,13 @@
#define S_IFSOCK 0140000
#define S_IFMT 0170000
-#define S_ISDIR(mode) (((mode) & S_IFDIR) == S_IFDIR)
-#define S_ISCHR(mode) (((mode) & S_IFCHR) == S_IFCHR)
-#define S_ISBLK(mode) (((mode) & S_IFBLK) == S_IFBLK)
-#define S_ISREG(mode) (((mode) & S_IFREG) == S_IFREG)
-#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO)
-#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK)
-#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK)
+#define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR)
+#define S_ISCHR(mode) (((mode) & S_IFMT) == S_IFCHR)
+#define S_ISBLK(mode) (((mode) & S_IFMT) == S_IFBLK)
+#define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG)
+#define S_ISFIFO(mode) (((mode) & S_IFMT) == S_IFIFO)
+#define S_ISLNK(mode) (((mode) & S_IFMT) == S_IFLNK)
+#define S_ISSOCK(mode) (((mode) & S_IFMT) == S_IFSOCK)
/* dirent types */
#define DT_UNKNOWN 0x0
@@ -89,39 +89,46 @@
#define EXIT_SUCCESS 0
#define EXIT_FAILURE 1
+#define FD_SETIDXMASK (8 * sizeof(unsigned long))
+#define FD_SETBITMASK (8 * sizeof(unsigned long)-1)
+
/* for select() */
typedef struct {
- uint32_t fd32[(FD_SETSIZE + 31) / 32];
+ unsigned long fds[(FD_SETSIZE + FD_SETBITMASK) / FD_SETIDXMASK];
} fd_set;
-#define FD_CLR(fd, set) do { \
- fd_set *__set = (set); \
- int __fd = (fd); \
- if (__fd >= 0) \
- __set->fd32[__fd / 32] &= ~(1U << (__fd & 31)); \
+#define FD_CLR(fd, set) do { \
+ fd_set *__set = (set); \
+ int __fd = (fd); \
+ if (__fd >= 0) \
+ __set->fds[__fd / FD_SETIDXMASK] &= \
+ ~(1U << (__fd & FX_SETBITMASK)); \
} while (0)
-#define FD_SET(fd, set) do { \
- fd_set *__set = (set); \
- int __fd = (fd); \
- if (__fd >= 0) \
- __set->fd32[__fd / 32] |= 1U << (__fd & 31); \
+#define FD_SET(fd, set) do { \
+ fd_set *__set = (set); \
+ int __fd = (fd); \
+ if (__fd >= 0) \
+ __set->fds[__fd / FD_SETIDXMASK] |= \
+ 1 << (__fd & FD_SETBITMASK); \
} while (0)
-#define FD_ISSET(fd, set) ({ \
- fd_set *__set = (set); \
- int __fd = (fd); \
- int __r = 0; \
- if (__fd >= 0) \
- __r = !!(__set->fd32[__fd / 32] & 1U << (__fd & 31)); \
- __r; \
+#define FD_ISSET(fd, set) ({ \
+ fd_set *__set = (set); \
+ int __fd = (fd); \
+ int __r = 0; \
+ if (__fd >= 0) \
+ __r = !!(__set->fds[__fd / FD_SETIDXMASK] & \
+1U << (__fd & FD_SET_BITMASK)); \
+ __r; \
})
-#define FD_ZERO(set) do { \
- fd_set *__set = (set); \
- int __idx; \
- for (__idx = 0; __idx < (FD_SETSIZE+31) / 32; __idx ++) \
- __set->fd32[__idx] = 0; \
+#define FD_ZERO(set) do { \
+ fd_set *__set = (set); \
+ int __idx; \
+ int __size = (FD_SETSIZE+FD_SETBITMASK) / FD_SETIDXMASK;\
+ for (__idx = 0; __idx < __size; __idx++) \
+ __set->fds[__idx] = 0; \
} while (0)
/* for poll() */
@@ -202,4 +209,7 @@ struct stat {
})
#endif
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#endif /* _NOLIBC_TYPES_H */
diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h
index 1c25e20ee360..1cfcd52106a4 100644
--- a/tools/include/nolibc/unistd.h
+++ b/tools/include/nolibc/unistd.h
@@ -51,4 +51,7 @@ int tcsetpgrp(int fd, pid_t pid)
return ioctl(fd, TIOCSPGRP, &pid);
}
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#endif /* _NOLIBC_UNISTD_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 20522d4ba1e0..55155e262646 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1767,6 +1767,7 @@ struct kvm_xen_hvm_attr {
__u8 runstate_update_flag;
struct {
__u64 gfn;
+#define KVM_XEN_INVALID_GFN ((__u64)-1)
} shared_info;
struct {
__u32 send_port;
@@ -1798,6 +1799,7 @@ struct kvm_xen_hvm_attr {
} u;
};
+
/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0
#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1
@@ -1823,6 +1825,7 @@ struct kvm_xen_vcpu_attr {
__u16 pad[3];
union {
__u64 gpa;
+#define KVM_XEN_INVALID_GPA ((__u64)-1)
__u64 pad[8];
struct {
__u64 state;
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 4350be739f4f..4b7c8b33069e 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -427,6 +427,15 @@ static int decode_instructions(struct objtool_file *file)
if (func->type != STT_NOTYPE && func->type != STT_FUNC)
continue;
+ if (func->offset == sec->sh.sh_size) {
+ /* Heuristic: likely an "end" symbol */
+ if (func->type == STT_NOTYPE)
+ continue;
+ WARN("%s(): STT_FUNC at end of section",
+ func->name);
+ return -1;
+ }
+
if (func->return_thunk || func->alias != func)
continue;
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index 6e7b88917ca0..ba5d942e4c6a 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -267,7 +267,7 @@ $(OUTPUT)%.xml : %.txt
$(ASCIIDOC) -b docbook -d manpage \
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) \
-aperf_date=$(shell git log -1 --pretty="format:%cd" \
- --date=short $<) \
+ --date=short --no-show-signature $<) \
-o $@+ $< && \
mv $@+ $@
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index c2504c39bdcb..5b8784675903 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -589,6 +589,8 @@ ifndef NO_LIBELF
$(call feature_check,libbpf-bpf_program__set_insns)
ifeq ($(feature-libbpf-bpf_program__set_insns), 1)
CFLAGS += -DHAVE_LIBBPF_BPF_PROGRAM__SET_INSNS
+ else
+ dummy := $(error Error: libbpf devel library needs to be >= 0.8.0 to build with LIBBPF_DYNAMIC, update or build statically with the version that comes with the kernel sources);
endif
$(call feature_check,libbpf-btf__raw_data)
ifeq ($(feature-libbpf-btf__raw_data), 1)
@@ -602,6 +604,8 @@ ifndef NO_LIBELF
dummy := $(error Error: No libbpf devel library found, please install libbpf-devel);
endif
else
+ # Libbpf will be built as a static library from tools/lib/bpf.
+ LIBBPF_STATIC := 1
CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD
CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM
@@ -1314,14 +1318,6 @@ tip_instdir_SQ = $(subst ','\'',$(tip_instdir))
export perfexec_instdir_SQ
-# If we install to $(HOME) we keep the traceevent default:
-# $(HOME)/.traceevent/plugins
-# Otherwise we install plugins into the global $(libdir).
-ifdef DESTDIR
-plugindir=$(libdir)/traceevent/plugins
-plugindir_SQ= $(subst ','\'',$(plugindir))
-endif
-
print_var = $(eval $(print_var_code)) $(info $(MSG))
define print_var_code
MSG = $(shell printf '...%40s: %s' $(1) $($(1)))
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 13e7d26e77f0..b7d9c4206230 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -303,10 +303,12 @@ ifneq ($(OUTPUT),)
else
LIBBPF_OUTPUT = $(CURDIR)/libbpf
endif
-LIBBPF_DESTDIR = $(LIBBPF_OUTPUT)
-LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include
-LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a
-CFLAGS += -I$(LIBBPF_OUTPUT)/include
+ifdef LIBBPF_STATIC
+ LIBBPF_DESTDIR = $(LIBBPF_OUTPUT)
+ LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include
+ LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a
+ CFLAGS += -I$(LIBBPF_OUTPUT)/include
+endif
ifneq ($(OUTPUT),)
LIBSUBCMD_OUTPUT = $(abspath $(OUTPUT))/libsubcmd
@@ -393,10 +395,8 @@ endif
export PERL_PATH
PERFLIBS = $(LIBAPI) $(LIBPERF) $(LIBSUBCMD) $(LIBSYMBOL)
-ifndef NO_LIBBPF
- ifndef LIBBPF_DYNAMIC
- PERFLIBS += $(LIBBPF)
- endif
+ifdef LIBBPF_STATIC
+ PERFLIBS += $(LIBBPF)
endif
# We choose to avoid "if .. else if .. else .. endif endif"
@@ -756,12 +756,15 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
$(arch_errno_name_array) \
$(sync_file_range_arrays) \
$(LIBAPI) \
- $(LIBBPF) \
$(LIBPERF) \
$(LIBSUBCMD) \
$(LIBSYMBOL) \
bpf-skel
+ifdef LIBBPF_STATIC
+prepare: $(LIBBPF)
+endif
+
$(OUTPUT)%.o: %.c prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
@@ -819,7 +822,7 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
$(LIBAPI): FORCE | $(LIBAPI_OUTPUT)
$(Q)$(MAKE) -C $(LIBAPI_DIR) O=$(LIBAPI_OUTPUT) \
- DESTDIR=$(LIBAPI_DESTDIR) prefix= \
+ DESTDIR=$(LIBAPI_DESTDIR) prefix= subdir= \
$@ install_headers
$(LIBAPI)-clean:
@@ -828,7 +831,7 @@ $(LIBAPI)-clean:
$(LIBBPF): FORCE | $(LIBBPF_OUTPUT)
$(Q)$(MAKE) -C $(LIBBPF_DIR) FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) \
- O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \
+ O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= subdir= \
$@ install_headers
$(LIBBPF)-clean:
@@ -837,7 +840,7 @@ $(LIBBPF)-clean:
$(LIBPERF): FORCE | $(LIBPERF_OUTPUT)
$(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(LIBPERF_OUTPUT) \
- DESTDIR=$(LIBPERF_DESTDIR) prefix= \
+ DESTDIR=$(LIBPERF_DESTDIR) prefix= subdir= \
$@ install_headers
$(LIBPERF)-clean:
@@ -846,7 +849,7 @@ $(LIBPERF)-clean:
$(LIBSUBCMD): FORCE | $(LIBSUBCMD_OUTPUT)
$(Q)$(MAKE) -C $(LIBSUBCMD_DIR) O=$(LIBSUBCMD_OUTPUT) \
- DESTDIR=$(LIBSUBCMD_DESTDIR) prefix= \
+ DESTDIR=$(LIBSUBCMD_DESTDIR) prefix= subdir= \
$@ install_headers
$(LIBSUBCMD)-clean:
@@ -855,7 +858,7 @@ $(LIBSUBCMD)-clean:
$(LIBSYMBOL): FORCE | $(LIBSYMBOL_OUTPUT)
$(Q)$(MAKE) -C $(LIBSYMBOL_DIR) O=$(LIBSYMBOL_OUTPUT) \
- DESTDIR=$(LIBSYMBOL_DESTDIR) prefix= \
+ DESTDIR=$(LIBSYMBOL_DESTDIR) prefix= subdir= \
$@ install_headers
$(LIBSYMBOL)-clean:
diff --git a/tools/perf/arch/riscv/util/unwind-libdw.c b/tools/perf/arch/riscv/util/unwind-libdw.c
index 19536e172850..54a198714eb8 100644
--- a/tools/perf/arch/riscv/util/unwind-libdw.c
+++ b/tools/perf/arch/riscv/util/unwind-libdw.c
@@ -4,7 +4,7 @@
#include <elfutils/libdwfl.h>
#include "../../util/unwind-libdw.h"
#include "../../util/perf_regs.h"
-#include "../../util/event.h"
+#include "../../util/sample.h"
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index e20656c431a4..8ae0a1535293 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -26,6 +26,7 @@
#include "util/string2.h"
#include <linux/kernel.h>
+#include <linux/numa.h>
#include <linux/rbtree.h>
#include <linux/string.h>
#include <linux/zalloc.h>
@@ -185,22 +186,33 @@ static int evsel__process_alloc_event(struct evsel *evsel, struct perf_sample *s
total_allocated += bytes_alloc;
nr_allocs++;
- return 0;
-}
-static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_sample *sample)
-{
- int ret = evsel__process_alloc_event(evsel, sample);
+ /*
+ * Commit 11e9734bcb6a ("mm/slab_common: unify NUMA and UMA
+ * version of tracepoints") adds the field "node" into the
+ * tracepoints 'kmalloc' and 'kmem_cache_alloc'.
+ *
+ * The legacy tracepoints 'kmalloc_node' and 'kmem_cache_alloc_node'
+ * also contain the field "node".
+ *
+ * If the tracepoint contains the field "node" the tool stats the
+ * cross allocation.
+ */
+ if (evsel__field(evsel, "node")) {
+ int node1, node2;
- if (!ret) {
- int node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}),
- node2 = evsel__intval(evsel, sample, "node");
+ node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu});
+ node2 = evsel__intval(evsel, sample, "node");
- if (node1 != node2)
+ /*
+ * If the field "node" is NUMA_NO_NODE (-1), we don't take it
+ * as a cross allocation.
+ */
+ if ((node2 != NUMA_NO_NODE) && (node1 != node2))
nr_cross_allocs++;
}
- return ret;
+ return 0;
}
static int ptr_cmp(void *, void *);
@@ -1369,8 +1381,8 @@ static int __cmd_kmem(struct perf_session *session)
/* slab allocator */
{ "kmem:kmalloc", evsel__process_alloc_event, },
{ "kmem:kmem_cache_alloc", evsel__process_alloc_event, },
- { "kmem:kmalloc_node", evsel__process_alloc_node_event, },
- { "kmem:kmem_cache_alloc_node", evsel__process_alloc_node_event, },
+ { "kmem:kmalloc_node", evsel__process_alloc_event, },
+ { "kmem:kmem_cache_alloc_node", evsel__process_alloc_event, },
{ "kmem:kfree", evsel__process_free_event, },
{ "kmem:kmem_cache_free", evsel__process_free_event, },
/* page allocator */
@@ -1824,6 +1836,19 @@ static int parse_line_opt(const struct option *opt __maybe_unused,
return 0;
}
+static bool slab_legacy_tp_is_exposed(void)
+{
+ /*
+ * The tracepoints "kmem:kmalloc_node" and
+ * "kmem:kmem_cache_alloc_node" have been removed on the latest
+ * kernel, if the tracepoint "kmem:kmalloc_node" is existed it
+ * means the tool is running on an old kernel, we need to
+ * rollback to support these legacy tracepoints.
+ */
+ return IS_ERR(trace_event__tp_format("kmem", "kmalloc_node")) ?
+ false : true;
+}
+
static int __cmd_record(int argc, const char **argv)
{
const char * const record_args[] = {
@@ -1831,22 +1856,28 @@ static int __cmd_record(int argc, const char **argv)
};
const char * const slab_events[] = {
"-e", "kmem:kmalloc",
- "-e", "kmem:kmalloc_node",
"-e", "kmem:kfree",
"-e", "kmem:kmem_cache_alloc",
- "-e", "kmem:kmem_cache_alloc_node",
"-e", "kmem:kmem_cache_free",
};
+ const char * const slab_legacy_events[] = {
+ "-e", "kmem:kmalloc_node",
+ "-e", "kmem:kmem_cache_alloc_node",
+ };
const char * const page_events[] = {
"-e", "kmem:mm_page_alloc",
"-e", "kmem:mm_page_free",
};
unsigned int rec_argc, i, j;
const char **rec_argv;
+ unsigned int slab_legacy_tp_exposed = slab_legacy_tp_is_exposed();
rec_argc = ARRAY_SIZE(record_args) + argc - 1;
- if (kmem_slab)
+ if (kmem_slab) {
rec_argc += ARRAY_SIZE(slab_events);
+ if (slab_legacy_tp_exposed)
+ rec_argc += ARRAY_SIZE(slab_legacy_events);
+ }
if (kmem_page)
rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */
@@ -1861,6 +1892,10 @@ static int __cmd_record(int argc, const char **argv)
if (kmem_slab) {
for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++)
rec_argv[i] = strdup(slab_events[j]);
+ if (slab_legacy_tp_exposed) {
+ for (j = 0; j < ARRAY_SIZE(slab_legacy_events); j++, i++)
+ rec_argv[i] = strdup(slab_legacy_events[j]);
+ }
}
if (kmem_page) {
rec_argv[i++] = strdup("-g");
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 718b82bfcdff..506c2fe42d52 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -1670,6 +1670,7 @@ static int __cmd_report(bool display_info)
/* for lock function check */
symbol_conf.sort_by_name = true;
+ symbol_conf.allow_aliases = true;
symbol__init(&session->header.env);
if (!data.is_pipe) {
@@ -1757,6 +1758,7 @@ static int __cmd_contention(int argc, const char **argv)
/* for lock function check */
symbol_conf.sort_by_name = true;
+ symbol_conf.allow_aliases = true;
symbol__init(&session->header.env);
if (use_bpf) {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 86e06f136f40..d21fe0f32a6d 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -16,7 +16,9 @@
#include "util/record.h"
#include <api/fs/tracing_path.h>
+#ifdef HAVE_LIBBPF_SUPPORT
#include <bpf/bpf.h>
+#endif
#include "util/bpf_map.h"
#include "util/rlimit.h"
#include "builtin.h"
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index 8fcab5ad00c5..e8d2762adade 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -16,20 +16,20 @@ perf-ftrace mainporcelain common
perf-inject mainporcelain common
perf-iostat mainporcelain common
perf-kallsyms mainporcelain common
-perf-kmem mainporcelain common
+perf-kmem mainporcelain traceevent
perf-kvm mainporcelain common
-perf-kwork mainporcelain common
+perf-kwork mainporcelain traceevent
perf-list mainporcelain common
-perf-lock mainporcelain common
+perf-lock mainporcelain traceevent
perf-mem mainporcelain common
perf-probe mainporcelain full
perf-record mainporcelain common
perf-report mainporcelain common
-perf-sched mainporcelain common
+perf-sched mainporcelain traceevent
perf-script mainporcelain common
perf-stat mainporcelain common
perf-test mainporcelain common
-perf-timechart mainporcelain common
+perf-timechart mainporcelain traceevent
perf-top mainporcelain common
perf-trace mainporcelain audit
perf-version mainporcelain common
diff --git a/tools/perf/tests/bpf-script-test-prologue.c b/tools/perf/tests/bpf-script-test-prologue.c
index bd83d364cf30..91778b5c6125 100644
--- a/tools/perf/tests/bpf-script-test-prologue.c
+++ b/tools/perf/tests/bpf-script-test-prologue.c
@@ -20,6 +20,8 @@
# undef if
#endif
+typedef unsigned int __bitwise fmode_t;
+
#define FMODE_READ 0x1
#define FMODE_WRITE 0x2
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index 05e818a8bbad..009d6efb673c 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -222,19 +222,7 @@ installed_files_bin := bin/perf
installed_files_bin += etc/bash_completion.d/perf
installed_files_bin += libexec/perf-core/perf-archive
-installed_files_plugins := $(lib)/traceevent/plugins/plugin_cfg80211.so
-installed_files_plugins += $(lib)/traceevent/plugins/plugin_scsi.so
-installed_files_plugins += $(lib)/traceevent/plugins/plugin_xen.so
-installed_files_plugins += $(lib)/traceevent/plugins/plugin_function.so
-installed_files_plugins += $(lib)/traceevent/plugins/plugin_sched_switch.so
-installed_files_plugins += $(lib)/traceevent/plugins/plugin_mac80211.so
-installed_files_plugins += $(lib)/traceevent/plugins/plugin_kvm.so
-installed_files_plugins += $(lib)/traceevent/plugins/plugin_kmem.so
-installed_files_plugins += $(lib)/traceevent/plugins/plugin_hrtimer.so
-installed_files_plugins += $(lib)/traceevent/plugins/plugin_jbd2.so
-
installed_files_all := $(installed_files_bin)
-installed_files_all += $(installed_files_plugins)
test_make_install := $(call test_dest_files,$(installed_files_all))
test_make_install_O := $(call test_dest_files,$(installed_files_all))
diff --git a/tools/perf/tests/shell/buildid.sh b/tools/perf/tests/shell/buildid.sh
index f05670d1e39e..aaf851108ca3 100755
--- a/tools/perf/tests/shell/buildid.sh
+++ b/tools/perf/tests/shell/buildid.sh
@@ -77,7 +77,20 @@ check()
file=${build_id_dir}/.build-id/${id:0:2}/`readlink ${link}`/elf
echo "file: ${file}"
- if [ ! -x $file ]; then
+ # Check for file permission of original file
+ # in case of pe-file.exe file
+ echo $1 | grep ".exe"
+ if [ $? -eq 0 ]; then
+ if [ -x $1 -a ! -x $file ]; then
+ echo "failed: file ${file} executable does not exist"
+ exit 1
+ fi
+
+ if [ ! -x $file -a ! -e $file ]; then
+ echo "failed: file ${file} does not exist"
+ exit 1
+ fi
+ elif [ ! -x $file ]; then
echo "failed: file ${file} does not exist"
exit 1
fi
diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
index 34c400ccbe04..57e7a6a470c9 100755
--- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
@@ -37,6 +37,7 @@ trace_libc_inet_pton_backtrace() {
case "$(uname -m)" in
s390x)
eventattr='call-graph=dwarf,max-stack=4'
+ echo "text_to_binary_address.*\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected
echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected
echo "(__GI_)?getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected
echo "main\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected
@@ -57,7 +58,7 @@ trace_libc_inet_pton_backtrace() {
perf_data=`mktemp -u /tmp/perf.data.XXX`
perf_script=`mktemp -u /tmp/perf.script.XXX`
perf record -e $event_name/$eventattr/ -o $perf_data ping -6 -c 1 ::1 > /dev/null 2>&1
- perf script -i $perf_data > $perf_script
+ perf script -i $perf_data | tac | grep -m1 ^ping -B9 | tac > $perf_script
exec 3<$perf_script
exec 4<$expected
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index de3701a2a212..13c3a237b9c9 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -33,7 +33,10 @@ typedef __kernel_sa_family_t sa_family_t;
struct sockaddr {
sa_family_t sa_family; /* address family, AF_xxx */
- char sa_data[14]; /* 14 bytes of protocol address */
+ union {
+ char sa_data_min[14]; /* Minimum 14 bytes of protocol address */
+ DECLARE_FLEX_ARRAY(char, sa_data);
+ };
};
struct linger {
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN
index 3cc42821d9b3..d7dc7c28508c 100755
--- a/tools/perf/util/PERF-VERSION-GEN
+++ b/tools/perf/util/PERF-VERSION-GEN
@@ -19,7 +19,7 @@ TAG=
if test -d ../../.git -o -f ../../.git
then
TAG=$(MAKEFLAGS= make -sC ../.. kernelversion)
- CID=$(git log -1 --abbrev=12 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID"
+ CID=$(git log -1 --abbrev=12 --pretty=format:"%h" --no-show-signature 2>/dev/null) && CID="-g$CID"
elif test -f ../../PERF-VERSION-FILE
then
TAG=$(cut -d' ' -f3 ../../PERF-VERSION-FILE | sed -e 's/\"//g')
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 265d20cc126b..c2e323cd7d49 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -2611,7 +2611,7 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start,
*size = sym->start - *start;
if (idx > 0) {
if (*size)
- return 1;
+ return 0;
} else if (dso_sym_match(sym, sym_name, &cnt, idx)) {
print_duplicate_syms(dso, sym_name);
return -EINVAL;
diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h
index 4dbf26408b69..c6d21c07b14c 100644
--- a/tools/perf/util/bpf_counter.h
+++ b/tools/perf/util/bpf_counter.h
@@ -4,9 +4,12 @@
#include <linux/list.h>
#include <sys/resource.h>
+
+#ifdef HAVE_LIBBPF_SUPPORT
#include <bpf/bpf.h>
#include <bpf/btf.h>
#include <bpf/libbpf.h>
+#endif
struct evsel;
struct target;
@@ -87,6 +90,8 @@ static inline void set_max_rlimit(void)
setrlimit(RLIMIT_MEMLOCK, &rinf);
}
+#ifdef HAVE_BPF_SKEL
+
static inline __u32 bpf_link_get_id(int fd)
{
struct bpf_link_info link_info = { .id = 0, };
@@ -127,5 +132,6 @@ static inline int bperf_trigger_reading(int prog_fd, int cpu)
return bpf_prog_test_run_opts(prog_fd, &opts);
}
+#endif /* HAVE_BPF_SKEL */
#endif /* __PERF_BPF_COUNTER_H */
diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c
index 3c2df7522f6f..1c82377ed78b 100644
--- a/tools/perf/util/bpf_counter_cgroup.c
+++ b/tools/perf/util/bpf_counter_cgroup.c
@@ -116,27 +116,19 @@ static int bperf_load_program(struct evlist *evlist)
/* open single copy of the events w/o cgroup */
err = evsel__open_per_cpu(evsel, evsel->core.cpus, -1);
- if (err) {
- pr_err("Failed to open first cgroup events\n");
- goto out;
- }
+ if (err == 0)
+ evsel->supported = true;
map_fd = bpf_map__fd(skel->maps.events);
perf_cpu_map__for_each_cpu(cpu, j, evsel->core.cpus) {
int fd = FD(evsel, j);
__u32 idx = evsel->core.idx * total_cpus + cpu.cpu;
- err = bpf_map_update_elem(map_fd, &idx, &fd,
- BPF_ANY);
- if (err < 0) {
- pr_err("Failed to update perf_event fd\n");
- goto out;
- }
+ bpf_map_update_elem(map_fd, &idx, &fd, BPF_ANY);
}
evsel->cgrp = leader_cgrp;
}
- evsel->supported = true;
if (evsel->cgrp == cgrp)
continue;
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index a839b30c981b..ea9c083ab1e3 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -715,9 +715,13 @@ build_id_cache__add(const char *sbuild_id, const char *name, const char *realnam
} else if (nsi && nsinfo__need_setns(nsi)) {
if (copyfile_ns(name, filename, nsi))
goto out_free;
- } else if (link(realname, filename) && errno != EEXIST &&
- copyfile(name, filename))
- goto out_free;
+ } else if (link(realname, filename) && errno != EEXIST) {
+ struct stat f_stat;
+
+ if (!(stat(name, &f_stat) < 0) &&
+ copyfile_mode(name, filename, f_stat.st_mode))
+ goto out_free;
+ }
}
/* Some binaries are stripped, but have .debug files with their symbol
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index e99b41f9be45..cd978c240e0d 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -224,6 +224,19 @@ static int add_cgroup_name(const char *fpath, const struct stat *sb __maybe_unus
return 0;
}
+static int check_and_add_cgroup_name(const char *fpath)
+{
+ struct cgroup_name *cn;
+
+ list_for_each_entry(cn, &cgroup_list, list) {
+ if (!strcmp(cn->name, fpath))
+ return 0;
+ }
+
+ /* pretend if it's added by ftw() */
+ return add_cgroup_name(fpath, NULL, FTW_D, NULL);
+}
+
static void release_cgroup_list(void)
{
struct cgroup_name *cn;
@@ -242,7 +255,7 @@ static int list_cgroups(const char *str)
struct cgroup_name *cn;
char *s;
- /* use given name as is - for testing purpose */
+ /* use given name as is when no regex is given */
for (;;) {
p = strchr(str, ',');
e = p ? p : eos;
@@ -253,13 +266,13 @@ static int list_cgroups(const char *str)
s = strndup(str, e - str);
if (!s)
return -1;
- /* pretend if it's added by ftw() */
- ret = add_cgroup_name(s, NULL, FTW_D, NULL);
+
+ ret = check_and_add_cgroup_name(s);
free(s);
- if (ret)
+ if (ret < 0)
return -1;
} else {
- if (add_cgroup_name("", NULL, FTW_D, NULL) < 0)
+ if (check_and_add_cgroup_name("/") < 0)
return -1;
}
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index a7f68c309545..fc16299c915f 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -132,6 +132,7 @@ int perf_data__open_dir(struct perf_data *data)
file->size = st.st_size;
}
+ closedir(dir);
if (!files)
return -EINVAL;
@@ -140,6 +141,7 @@ int perf_data__open_dir(struct perf_data *data)
return 0;
out_err:
+ closedir(dir);
close_dir(files, nr);
return ret;
}
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
index 0168a9637330..d47de5f270a8 100644
--- a/tools/perf/util/expr.l
+++ b/tools/perf/util/expr.l
@@ -42,8 +42,11 @@ static char *normalize(char *str, int runtime)
char *dst = str;
while (*str) {
- if (*str == '\\')
+ if (*str == '\\') {
*dst++ = *++str;
+ if (!*str)
+ break;
+ }
else if (*str == '?') {
char *paramval;
int i = 0;
diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh
index c3cef36d4176..1b5140e5ce99 100755
--- a/tools/perf/util/generate-cmdlist.sh
+++ b/tools/perf/util/generate-cmdlist.sh
@@ -38,7 +38,7 @@ do
done
echo "#endif /* HAVE_LIBELF_SUPPORT */"
-echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)"
+echo "#if defined(HAVE_LIBTRACEEVENT) && (defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT))"
sed -n -e 's/^perf-\([^ ]*\)[ ].* audit*/\1/p' command-list.txt |
sort |
while read cmd
@@ -51,5 +51,20 @@ do
p
}' "Documentation/perf-$cmd.txt"
done
-echo "#endif /* HAVE_LIBELF_SUPPORT */"
+echo "#endif /* HAVE_LIBTRACEEVENT && (HAVE_LIBAUDIT_SUPPORT || HAVE_SYSCALL_TABLE_SUPPORT) */"
+
+echo "#ifdef HAVE_LIBTRACEEVENT"
+sed -n -e 's/^perf-\([^ ]*\)[ ].* traceevent.*/\1/p' command-list.txt |
+sort |
+while read cmd
+do
+ sed -n '
+ /^NAME/,/perf-'"$cmd"'/H
+ ${
+ x
+ s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/
+ p
+ }' "Documentation/perf-$cmd.txt"
+done
+echo "#endif /* HAVE_LIBTRACEEVENT */"
echo "};"
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index e188f74698dd..37662cdec5ee 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2971,6 +2971,18 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok,
ret = add_all_matching_fields(evlist, field_name, raw_trace, level);
goto out;
}
+#else
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) {
+ pr_err("%s %s", ret ? "," : "This perf binary isn't linked with libtraceevent, can't process", evsel__name(evsel));
+ ret = -ENOTSUP;
+ }
+ }
+
+ if (ret) {
+ pr_err("\n");
+ goto out;
+ }
#endif
evsel = find_evsel(evlist, event_name);
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index add6c5d9531c..9b3cd79cca12 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -4,6 +4,7 @@
#include <stdbool.h>
#include <stdio.h>
+#include <sys/types.h>
#include <linux/types.h>
struct evlist;
diff --git a/tools/testing/memblock/.gitignore b/tools/testing/memblock/.gitignore
index 654338e0be52..4cc7cd5aac2b 100644
--- a/tools/testing/memblock/.gitignore
+++ b/tools/testing/memblock/.gitignore
@@ -1,4 +1,5 @@
main
memblock.c
linux/memblock.h
+asm/asm.h
asm/cmpxchg.h
diff --git a/tools/testing/memblock/Makefile b/tools/testing/memblock/Makefile
index 2310ac4d080e..7a1ca694a982 100644
--- a/tools/testing/memblock/Makefile
+++ b/tools/testing/memblock/Makefile
@@ -29,13 +29,14 @@ include: ../../../include/linux/memblock.h ../../include/linux/*.h \
@mkdir -p linux
test -L linux/memblock.h || ln -s ../../../../include/linux/memblock.h linux/memblock.h
+ test -L asm/asm.h || ln -s ../../../arch/x86/include/asm/asm.h asm/asm.h
test -L asm/cmpxchg.h || ln -s ../../../arch/x86/include/asm/cmpxchg.h asm/cmpxchg.h
memblock.c: $(EXTR_SRC)
test -L memblock.c || ln -s $(EXTR_SRC) memblock.c
clean:
- $(RM) $(TARGETS) $(OFILES) linux/memblock.h memblock.c asm/cmpxchg.h
+ $(RM) $(TARGETS) $(OFILES) linux/memblock.h memblock.c asm/asm.h asm/cmpxchg.h
help:
@echo 'Memblock simulator'
diff --git a/tools/testing/selftests/amd-pstate/Makefile b/tools/testing/selftests/amd-pstate/Makefile
index 5f195ee756d6..5fd1424db37d 100644
--- a/tools/testing/selftests/amd-pstate/Makefile
+++ b/tools/testing/selftests/amd-pstate/Makefile
@@ -7,11 +7,6 @@ all:
uname_M := $(shell uname -m 2>/dev/null || echo not)
ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
-ifeq (x86,$(ARCH))
-TEST_GEN_FILES += ../../../power/x86/amd_pstate_tracer/amd_pstate_trace.py
-TEST_GEN_FILES += ../../../power/x86/intel_pstate_tracer/intel_pstate_tracer.py
-endif
-
TEST_PROGS := run.sh
TEST_FILES := basic.sh tbench.sh gitsource.sh
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 07d2d0a8c5cb..401a75844cc0 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -36,6 +36,7 @@ test_cpp
*.lskel.h
/no_alu32
/bpf_gcc
+/host-tools
/tools
/runqslower
/bench
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 585fcf73c731..3fc3e54b19aa 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -14,6 +14,7 @@ cgrp_kfunc # JIT does not support calling kernel f
cgrp_local_storage # prog_attach unexpected error: -524 (trampoline)
core_read_macros # unknown func bpf_probe_read#4 (overlapping)
d_path # failed to auto-attach program 'prog_stat': -524 (trampoline)
+decap_sanity # JIT does not support calling kernel function (kfunc)
deny_namespace # failed to attach: ERROR: strerror_r(-524)=22 (trampoline)
dummy_st_ops # test_run unexpected error: -524 (errno 524) (trampoline)
fentry_fexit # fentry attach failed: -524 (trampoline)
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 6f8ed61fc4b4..3af6450763e9 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -1465,6 +1465,77 @@ out:
bpf_iter_task_vma__destroy(skel);
}
+static void test_task_vma_dead_task(void)
+{
+ struct bpf_iter_task_vma *skel;
+ int wstatus, child_pid = -1;
+ time_t start_tm, cur_tm;
+ int err, iter_fd = -1;
+ int wait_sec = 3;
+
+ skel = bpf_iter_task_vma__open();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma__open"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ err = bpf_iter_task_vma__load(skel);
+ if (!ASSERT_OK(err, "bpf_iter_task_vma__load"))
+ goto out;
+
+ skel->links.proc_maps = bpf_program__attach_iter(
+ skel->progs.proc_maps, NULL);
+
+ if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) {
+ skel->links.proc_maps = NULL;
+ goto out;
+ }
+
+ start_tm = time(NULL);
+ cur_tm = start_tm;
+
+ child_pid = fork();
+ if (child_pid == 0) {
+ /* Fork short-lived processes in the background. */
+ while (cur_tm < start_tm + wait_sec) {
+ system("echo > /dev/null");
+ cur_tm = time(NULL);
+ }
+ exit(0);
+ }
+
+ if (!ASSERT_GE(child_pid, 0, "fork_child"))
+ goto out;
+
+ while (cur_tm < start_tm + wait_sec) {
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.proc_maps));
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
+ goto out;
+
+ /* Drain all data from iter_fd. */
+ while (cur_tm < start_tm + wait_sec) {
+ err = read_fd_into_buffer(iter_fd, task_vma_output, CMP_BUFFER_SIZE);
+ if (!ASSERT_GE(err, 0, "read_iter_fd"))
+ goto out;
+
+ cur_tm = time(NULL);
+
+ if (err == 0)
+ break;
+ }
+
+ close(iter_fd);
+ iter_fd = -1;
+ }
+
+ check_bpf_link_info(skel->progs.proc_maps);
+
+out:
+ waitpid(child_pid, &wstatus, 0);
+ close(iter_fd);
+ bpf_iter_task_vma__destroy(skel);
+}
+
void test_bpf_sockmap_map_iter_fd(void)
{
struct bpf_iter_sockmap *skel;
@@ -1586,6 +1657,8 @@ void test_bpf_iter(void)
test_task_file();
if (test__start_subtest("task_vma"))
test_task_vma();
+ if (test__start_subtest("task_vma_dead_task"))
+ test_task_vma_dead_task();
if (test__start_subtest("task_btf"))
test_task_btf();
if (test__start_subtest("tcp4"))
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 0ba2e8b9c6ac..e9ea38aa8248 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -801,7 +801,7 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
char *str)
{
-#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
+#if 0
TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT,
"int cpu_number = (int)100", 100);
#endif
diff --git a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
new file mode 100644
index 000000000000..0b2f73b88c53
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <net/if.h>
+#include <linux/in6.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "decap_sanity.skel.h"
+
+#define SYS(fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ if (!ASSERT_OK(system(cmd), cmd)) \
+ goto fail; \
+ })
+
+#define NS_TEST "decap_sanity_ns"
+#define IPV6_IFACE_ADDR "face::1"
+#define UDP_TEST_PORT 7777
+
+void test_decap_sanity(void)
+{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_hook, .attach_point = BPF_TC_EGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_attach);
+ struct nstoken *nstoken = NULL;
+ struct decap_sanity *skel;
+ struct sockaddr_in6 addr;
+ socklen_t addrlen;
+ char buf[128] = {};
+ int sockfd, err;
+
+ skel = decap_sanity__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
+ return;
+
+ SYS("ip netns add %s", NS_TEST);
+ SYS("ip -net %s -6 addr add %s/128 dev lo nodad", NS_TEST, IPV6_IFACE_ADDR);
+ SYS("ip -net %s link set dev lo up", NS_TEST);
+
+ nstoken = open_netns(NS_TEST);
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto fail;
+
+ qdisc_hook.ifindex = if_nametoindex("lo");
+ if (!ASSERT_GT(qdisc_hook.ifindex, 0, "if_nametoindex lo"))
+ goto fail;
+
+ err = bpf_tc_hook_create(&qdisc_hook);
+ if (!ASSERT_OK(err, "create qdisc hook"))
+ goto fail;
+
+ tc_attach.prog_fd = bpf_program__fd(skel->progs.decap_sanity);
+ err = bpf_tc_attach(&qdisc_hook, &tc_attach);
+ if (!ASSERT_OK(err, "attach filter"))
+ goto fail;
+
+ addrlen = sizeof(addr);
+ err = make_sockaddr(AF_INET6, IPV6_IFACE_ADDR, UDP_TEST_PORT,
+ (void *)&addr, &addrlen);
+ if (!ASSERT_OK(err, "make_sockaddr"))
+ goto fail;
+ sockfd = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (!ASSERT_NEQ(sockfd, -1, "socket"))
+ goto fail;
+ err = sendto(sockfd, buf, sizeof(buf), 0, (void *)&addr, addrlen);
+ close(sockfd);
+ if (!ASSERT_EQ(err, sizeof(buf), "send"))
+ goto fail;
+
+ ASSERT_TRUE(skel->bss->init_csum_partial, "init_csum_partial");
+ ASSERT_TRUE(skel->bss->final_csum_none, "final_csum_none");
+ ASSERT_FALSE(skel->bss->broken_csum_start, "broken_csum_start");
+
+fail:
+ if (nstoken) {
+ bpf_tc_hook_destroy(&qdisc_hook);
+ close_netns(nstoken);
+ }
+ system("ip netns del " NS_TEST " >& /dev/null");
+ decap_sanity__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c b/tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c
new file mode 100644
index 000000000000..3add34df5767
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "jeq_infer_not_null_fail.skel.h"
+
+void test_jeq_infer_not_null(void)
+{
+ RUN_TESTS(jeq_infer_not_null_fail);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 2cf0c7a3fe23..567e07c19ecc 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -30,6 +30,8 @@
#define MAX_STRERR_LEN 256
#define MAX_TEST_NAME 80
+#define __always_unused __attribute__((__unused__))
+
#define _FAIL(errnum, fmt...) \
({ \
error_at_line(0, (errnum), __func__, __LINE__, fmt); \
@@ -321,7 +323,8 @@ static int socket_loopback(int family, int sotype)
return socket_loopback_reuseport(family, sotype, -1);
}
-static void test_insert_invalid(int family, int sotype, int mapfd)
+static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
u32 key = 0;
u64 value;
@@ -338,7 +341,8 @@ static void test_insert_invalid(int family, int sotype, int mapfd)
FAIL_ERRNO("map_update: expected EBADF");
}
-static void test_insert_opened(int family, int sotype, int mapfd)
+static void test_insert_opened(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
u32 key = 0;
u64 value;
@@ -359,7 +363,8 @@ static void test_insert_opened(int family, int sotype, int mapfd)
xclose(s);
}
-static void test_insert_bound(int family, int sotype, int mapfd)
+static void test_insert_bound(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
struct sockaddr_storage addr;
socklen_t len;
@@ -386,7 +391,8 @@ close:
xclose(s);
}
-static void test_insert(int family, int sotype, int mapfd)
+static void test_insert(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
u64 value;
u32 key;
@@ -402,7 +408,8 @@ static void test_insert(int family, int sotype, int mapfd)
xclose(s);
}
-static void test_delete_after_insert(int family, int sotype, int mapfd)
+static void test_delete_after_insert(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
u64 value;
u32 key;
@@ -419,7 +426,8 @@ static void test_delete_after_insert(int family, int sotype, int mapfd)
xclose(s);
}
-static void test_delete_after_close(int family, int sotype, int mapfd)
+static void test_delete_after_close(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
int err, s;
u64 value;
@@ -442,7 +450,8 @@ static void test_delete_after_close(int family, int sotype, int mapfd)
FAIL_ERRNO("map_delete: expected EINVAL/EINVAL");
}
-static void test_lookup_after_insert(int family, int sotype, int mapfd)
+static void test_lookup_after_insert(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
u64 cookie, value;
socklen_t len;
@@ -470,7 +479,8 @@ static void test_lookup_after_insert(int family, int sotype, int mapfd)
xclose(s);
}
-static void test_lookup_after_delete(int family, int sotype, int mapfd)
+static void test_lookup_after_delete(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
int err, s;
u64 value;
@@ -493,7 +503,8 @@ static void test_lookup_after_delete(int family, int sotype, int mapfd)
xclose(s);
}
-static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
+static void test_lookup_32_bit_value(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
u32 key, value32;
int err, s;
@@ -523,7 +534,8 @@ close:
xclose(s);
}
-static void test_update_existing(int family, int sotype, int mapfd)
+static void test_update_existing(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
int s1, s2;
u64 value;
@@ -551,7 +563,7 @@ close_s1:
/* Exercise the code path where we destroy child sockets that never
* got accept()'ed, aka orphans, when parent socket gets closed.
*/
-static void test_destroy_orphan_child(int family, int sotype, int mapfd)
+static void do_destroy_orphan_child(int family, int sotype, int mapfd)
{
struct sockaddr_storage addr;
socklen_t len;
@@ -582,10 +594,38 @@ close_srv:
xclose(s);
}
+static void test_destroy_orphan_child(struct test_sockmap_listen *skel,
+ int family, int sotype, int mapfd)
+{
+ int msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
+ int skb_verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ const struct test {
+ int progfd;
+ enum bpf_attach_type atype;
+ } tests[] = {
+ { -1, -1 },
+ { msg_verdict, BPF_SK_MSG_VERDICT },
+ { skb_verdict, BPF_SK_SKB_VERDICT },
+ };
+ const struct test *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ if (t->progfd != -1 &&
+ xbpf_prog_attach(t->progfd, mapfd, t->atype, 0) != 0)
+ return;
+
+ do_destroy_orphan_child(family, sotype, mapfd);
+
+ if (t->progfd != -1)
+ xbpf_prog_detach2(t->progfd, mapfd, t->atype);
+ }
+}
+
/* Perform a passive open after removing listening socket from SOCKMAP
* to ensure that callbacks get restored properly.
*/
-static void test_clone_after_delete(int family, int sotype, int mapfd)
+static void test_clone_after_delete(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
struct sockaddr_storage addr;
socklen_t len;
@@ -621,7 +661,8 @@ close_srv:
* SOCKMAP, but got accept()'ed only after the parent has been removed
* from SOCKMAP, gets cloned without parent psock state or callbacks.
*/
-static void test_accept_after_delete(int family, int sotype, int mapfd)
+static void test_accept_after_delete(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
struct sockaddr_storage addr;
const u32 zero = 0;
@@ -675,7 +716,8 @@ close_srv:
/* Check that child socket that got created and accepted while parent
* was in a SOCKMAP is cloned without parent psock state or callbacks.
*/
-static void test_accept_before_delete(int family, int sotype, int mapfd)
+static void test_accept_before_delete(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
struct sockaddr_storage addr;
const u32 zero = 0, one = 1;
@@ -784,7 +826,8 @@ done:
return NULL;
}
-static void test_syn_recv_insert_delete(int family, int sotype, int mapfd)
+static void test_syn_recv_insert_delete(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
struct connect_accept_ctx ctx = { 0 };
struct sockaddr_storage addr;
@@ -847,7 +890,8 @@ static void *listen_thread(void *arg)
return NULL;
}
-static void test_race_insert_listen(int family, int socktype, int mapfd)
+static void test_race_insert_listen(struct test_sockmap_listen *skel __always_unused,
+ int family, int socktype, int mapfd)
{
struct connect_accept_ctx ctx = { 0 };
const u32 zero = 0;
@@ -1473,7 +1517,8 @@ static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
int family, int sotype)
{
const struct op_test {
- void (*fn)(int family, int sotype, int mapfd);
+ void (*fn)(struct test_sockmap_listen *skel,
+ int family, int sotype, int mapfd);
const char *name;
int sotype;
} tests[] = {
@@ -1520,7 +1565,7 @@ static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
if (!test__start_subtest(s))
continue;
- t->fn(family, sotype, map_fd);
+ t->fn(skel, family, sotype, map_fd);
test_ops_cleanup(map);
}
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index b394817126cf..cfed4df490f3 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -50,6 +50,12 @@
#define ICSK_TIME_LOSS_PROBE 5
#define ICSK_TIME_REO_TIMEOUT 6
+#define ETH_HLEN 14
+#define ETH_P_IPV6 0x86DD
+
+#define CHECKSUM_NONE 0
+#define CHECKSUM_PARTIAL 3
+
#define IFNAMSIZ 16
#define RTF_GATEWAY 0x0002
diff --git a/tools/testing/selftests/bpf/progs/decap_sanity.c b/tools/testing/selftests/bpf/progs/decap_sanity.c
new file mode 100644
index 000000000000..bd3c657c58a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/decap_sanity.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define UDP_TEST_PORT 7777
+
+void *bpf_cast_to_kern_ctx(void *) __ksym;
+bool init_csum_partial = false;
+bool final_csum_none = false;
+bool broken_csum_start = false;
+
+static unsigned int skb_headlen(const struct sk_buff *skb)
+{
+ return skb->len - skb->data_len;
+}
+
+static unsigned int skb_headroom(const struct sk_buff *skb)
+{
+ return skb->data - skb->head;
+}
+
+static int skb_checksum_start_offset(const struct sk_buff *skb)
+{
+ return skb->csum_start - skb_headroom(skb);
+}
+
+SEC("tc")
+int decap_sanity(struct __sk_buff *skb)
+{
+ struct sk_buff *kskb;
+ struct ipv6hdr ip6h;
+ struct udphdr udph;
+ int err;
+
+ if (skb->protocol != __bpf_constant_htons(ETH_P_IPV6))
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_load_bytes(skb, ETH_HLEN, &ip6h, sizeof(ip6h)))
+ return TC_ACT_SHOT;
+
+ if (ip6h.nexthdr != IPPROTO_UDP)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(ip6h), &udph, sizeof(udph)))
+ return TC_ACT_SHOT;
+
+ if (udph.dest != __bpf_constant_htons(UDP_TEST_PORT))
+ return TC_ACT_SHOT;
+
+ kskb = bpf_cast_to_kern_ctx(skb);
+ init_csum_partial = (kskb->ip_summed == CHECKSUM_PARTIAL);
+ err = bpf_skb_adjust_room(skb, -(s32)(ETH_HLEN + sizeof(ip6h) + sizeof(udph)),
+ 1, BPF_F_ADJ_ROOM_FIXED_GSO);
+ if (err)
+ return TC_ACT_SHOT;
+ final_csum_none = (kskb->ip_summed == CHECKSUM_NONE);
+ if (kskb->ip_summed == CHECKSUM_PARTIAL &&
+ (unsigned int)skb_checksum_start_offset(kskb) >= skb_headlen(kskb))
+ broken_csum_start = true;
+
+ return TC_ACT_SHOT;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c b/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c
new file mode 100644
index 000000000000..f46965053acb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, u64);
+ __type(value, u64);
+} m_hash SEC(".maps");
+
+SEC("?raw_tp")
+__failure __msg("R8 invalid mem access 'map_value_or_null")
+int jeq_infer_not_null_ptr_to_btfid(void *ctx)
+{
+ struct bpf_map *map = (struct bpf_map *)&m_hash;
+ struct bpf_map *inner_map = map->inner_map_meta;
+ u64 key = 0, ret = 0, *val;
+
+ val = bpf_map_lookup_elem(map, &key);
+ /* Do not mark ptr as non-null if one of them is
+ * PTR_TO_BTF_ID (R9), reject because of invalid
+ * access to map value (R8).
+ *
+ * Here, we need to inline those insns to access
+ * R8 directly, since compiler may use other reg
+ * once it figures out val==inner_map.
+ */
+ asm volatile("r8 = %[val];\n"
+ "r9 = %[inner_map];\n"
+ "if r8 != r9 goto +1;\n"
+ "%[ret] = *(u64 *)(r8 +0);\n"
+ : [ret] "+r"(ret)
+ : [inner_map] "r"(inner_map), [val] "r"(val)
+ : "r8", "r9");
+
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index 92331053dba3..7bd76b9e0f98 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -826,7 +826,7 @@ out:
SEC("kprobe/vfs_link")
int BPF_KPROBE(kprobe__vfs_link,
- struct dentry* old_dentry, struct user_namespace *mnt_userns,
+ struct dentry* old_dentry, struct mnt_idmap *idmap,
struct inode* dir, struct dentry* new_dentry,
struct inode** delegated_inode)
{
diff --git a/tools/testing/selftests/bpf/verifier/search_pruning.c b/tools/testing/selftests/bpf/verifier/search_pruning.c
index 68b14fdfebdb..d63fd8991b03 100644
--- a/tools/testing/selftests/bpf/verifier/search_pruning.c
+++ b/tools/testing/selftests/bpf/verifier/search_pruning.c
@@ -225,3 +225,39 @@
.result_unpriv = ACCEPT,
.insn_processed = 15,
},
+/* The test performs a conditional 64-bit write to a stack location
+ * fp[-8], this is followed by an unconditional 8-bit write to fp[-8],
+ * then data is read from fp[-8]. This sequence is unsafe.
+ *
+ * The test would be mistakenly marked as safe w/o dst register parent
+ * preservation in verifier.c:copy_register_state() function.
+ *
+ * Note the usage of BPF_F_TEST_STATE_FREQ to force creation of the
+ * checkpoint state after conditional 64-bit assignment.
+ */
+{
+ "write tracking and register parent chain bug",
+ .insns = {
+ /* r6 = ktime_get_ns() */
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ /* r0 = ktime_get_ns() */
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+ /* if r0 > r6 goto +1 */
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_6, 1),
+ /* *(u64 *)(r10 - 8) = 0xdeadbeef */
+ BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0xdeadbeef),
+ /* r1 = 42 */
+ BPF_MOV64_IMM(BPF_REG_1, 42),
+ /* *(u8 *)(r10 - 8) = r1 */
+ BPF_STX_MEM(BPF_B, BPF_REG_FP, BPF_REG_1, -8),
+ /* r2 = *(u64 *)(r10 - 8) */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_FP, -8),
+ /* exit(0) */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .flags = BPF_F_TEST_STATE_FREQ,
+ .errstr = "invalid read from stack off -8+1 size 8",
+ .result = REJECT,
+},
diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
index 186e1c26867e..75c100de90ff 100755
--- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh
+++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
@@ -268,6 +268,7 @@ TEST_MATRIX=(
# Taking away all CPUs from parent or itself if there are tasks
# will make the partition invalid.
" S+ C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3,A2:2-3 A1:P1,A2:P-1"
+ " S+ C3:P1:S+ C3 . . T P1 . . 0 A1:3,A2:3 A1:P1,A2:P-1"
" S+ $SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
" S+ $SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
index 9c79bbcce5a8..aff0a59f92d9 100755
--- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -246,7 +246,7 @@ test_vlan_ingress_modify()
bridge vlan add dev $swp2 vid 300
tc filter add dev $swp1 ingress chain $(IS1 2) pref 3 \
- protocol 802.1Q flower skip_sw vlan_id 200 \
+ protocol 802.1Q flower skip_sw vlan_id 200 src_mac $h1_mac \
action vlan modify id 300 \
action goto chain $(IS2 0 0)
diff --git a/tools/testing/selftests/filesystems/fat/run_fat_tests.sh b/tools/testing/selftests/filesystems/fat/run_fat_tests.sh
index 7f35dc3d15df..7f35dc3d15df 100644..100755
--- a/tools/testing/selftests/filesystems/fat/run_fat_tests.sh
+++ b/tools/testing/selftests/filesystems/fat/run_fat_tests.sh
diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
index beb944fa6fd4..54680dc5887f 100644
--- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
@@ -237,6 +237,11 @@ static void guest_check_s1ptw_wr_in_dirty_log(void)
GUEST_SYNC(CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG);
}
+static void guest_check_no_s1ptw_wr_in_dirty_log(void)
+{
+ GUEST_SYNC(CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG);
+}
+
static void guest_exec(void)
{
int (*code)(void) = (int (*)(void))TEST_EXEC_GVA;
@@ -304,7 +309,7 @@ static struct uffd_args {
/* Returns true to continue the test, and false if it should be skipped. */
static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg,
- struct uffd_args *args, bool expect_write)
+ struct uffd_args *args)
{
uint64_t addr = msg->arg.pagefault.address;
uint64_t flags = msg->arg.pagefault.flags;
@@ -313,7 +318,6 @@ static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg,
TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING,
"The only expected UFFD mode is MISSING");
- ASSERT_EQ(!!(flags & UFFD_PAGEFAULT_FLAG_WRITE), expect_write);
ASSERT_EQ(addr, (uint64_t)args->hva);
pr_debug("uffd fault: addr=%p write=%d\n",
@@ -337,19 +341,14 @@ static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg,
return 0;
}
-static int uffd_pt_write_handler(int mode, int uffd, struct uffd_msg *msg)
-{
- return uffd_generic_handler(mode, uffd, msg, &pt_args, true);
-}
-
-static int uffd_data_write_handler(int mode, int uffd, struct uffd_msg *msg)
+static int uffd_pt_handler(int mode, int uffd, struct uffd_msg *msg)
{
- return uffd_generic_handler(mode, uffd, msg, &data_args, true);
+ return uffd_generic_handler(mode, uffd, msg, &pt_args);
}
-static int uffd_data_read_handler(int mode, int uffd, struct uffd_msg *msg)
+static int uffd_data_handler(int mode, int uffd, struct uffd_msg *msg)
{
- return uffd_generic_handler(mode, uffd, msg, &data_args, false);
+ return uffd_generic_handler(mode, uffd, msg, &data_args);
}
static void setup_uffd_args(struct userspace_mem_region *region,
@@ -471,9 +470,12 @@ static bool handle_cmd(struct kvm_vm *vm, int cmd)
{
struct userspace_mem_region *data_region, *pt_region;
bool continue_test = true;
+ uint64_t pte_gpa, pte_pg;
data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
pt_region = vm_get_mem_region(vm, MEM_REGION_PT);
+ pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
+ pte_pg = (pte_gpa - pt_region->region.guest_phys_addr) / getpagesize();
if (cmd == CMD_SKIP_TEST)
continue_test = false;
@@ -486,13 +488,13 @@ static bool handle_cmd(struct kvm_vm *vm, int cmd)
TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0),
"Missing write in dirty log");
if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG)
- TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, 0),
+ TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, pte_pg),
"Missing s1ptw write in dirty log");
if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG)
TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0),
"Unexpected write in dirty log");
if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG)
- TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, 0),
+ TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, pte_pg),
"Unexpected s1ptw write in dirty log");
return continue_test;
@@ -797,7 +799,7 @@ static void help(char *name)
.expected_events = { .uffd_faults = _uffd_faults, }, \
}
-#define TEST_DIRTY_LOG(_access, _with_af, _test_check) \
+#define TEST_DIRTY_LOG(_access, _with_af, _test_check, _pt_check) \
{ \
.name = SCAT3(dirty_log, _access, _with_af), \
.data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
@@ -805,13 +807,12 @@ static void help(char *name)
.guest_prepare = { _PREPARE(_with_af), \
_PREPARE(_access) }, \
.guest_test = _access, \
- .guest_test_check = { _CHECK(_with_af), _test_check, \
- guest_check_s1ptw_wr_in_dirty_log}, \
+ .guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \
.expected_events = { 0 }, \
}
#define TEST_UFFD_AND_DIRTY_LOG(_access, _with_af, _uffd_data_handler, \
- _uffd_faults, _test_check) \
+ _uffd_faults, _test_check, _pt_check) \
{ \
.name = SCAT3(uffd_and_dirty_log, _access, _with_af), \
.data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
@@ -820,16 +821,17 @@ static void help(char *name)
_PREPARE(_access) }, \
.guest_test = _access, \
.mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
- .guest_test_check = { _CHECK(_with_af), _test_check }, \
+ .guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \
.uffd_data_handler = _uffd_data_handler, \
- .uffd_pt_handler = uffd_pt_write_handler, \
+ .uffd_pt_handler = uffd_pt_handler, \
.expected_events = { .uffd_faults = _uffd_faults, }, \
}
#define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits) \
{ \
- .name = SCAT3(ro_memslot, _access, _with_af), \
+ .name = SCAT2(ro_memslot, _access), \
.data_memslot_flags = KVM_MEM_READONLY, \
+ .pt_memslot_flags = KVM_MEM_READONLY, \
.guest_prepare = { _PREPARE(_access) }, \
.guest_test = _access, \
.mmio_handler = _mmio_handler, \
@@ -840,6 +842,7 @@ static void help(char *name)
{ \
.name = SCAT2(ro_memslot_no_syndrome, _access), \
.data_memslot_flags = KVM_MEM_READONLY, \
+ .pt_memslot_flags = KVM_MEM_READONLY, \
.guest_test = _access, \
.fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
.expected_events = { .fail_vcpu_runs = 1 }, \
@@ -848,9 +851,9 @@ static void help(char *name)
#define TEST_RO_MEMSLOT_AND_DIRTY_LOG(_access, _mmio_handler, _mmio_exits, \
_test_check) \
{ \
- .name = SCAT3(ro_memslot, _access, _with_af), \
+ .name = SCAT2(ro_memslot, _access), \
.data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
- .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
+ .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
.guest_prepare = { _PREPARE(_access) }, \
.guest_test = _access, \
.guest_test_check = { _test_check }, \
@@ -862,7 +865,7 @@ static void help(char *name)
{ \
.name = SCAT2(ro_memslot_no_syn_and_dlog, _access), \
.data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
- .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
+ .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
.guest_test = _access, \
.guest_test_check = { _test_check }, \
.fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
@@ -874,11 +877,12 @@ static void help(char *name)
{ \
.name = SCAT2(ro_memslot_uffd, _access), \
.data_memslot_flags = KVM_MEM_READONLY, \
+ .pt_memslot_flags = KVM_MEM_READONLY, \
.mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
.guest_prepare = { _PREPARE(_access) }, \
.guest_test = _access, \
.uffd_data_handler = _uffd_data_handler, \
- .uffd_pt_handler = uffd_pt_write_handler, \
+ .uffd_pt_handler = uffd_pt_handler, \
.mmio_handler = _mmio_handler, \
.expected_events = { .mmio_exits = _mmio_exits, \
.uffd_faults = _uffd_faults }, \
@@ -889,10 +893,11 @@ static void help(char *name)
{ \
.name = SCAT2(ro_memslot_no_syndrome, _access), \
.data_memslot_flags = KVM_MEM_READONLY, \
+ .pt_memslot_flags = KVM_MEM_READONLY, \
.mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
.guest_test = _access, \
.uffd_data_handler = _uffd_data_handler, \
- .uffd_pt_handler = uffd_pt_write_handler, \
+ .uffd_pt_handler = uffd_pt_handler, \
.fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
.expected_events = { .fail_vcpu_runs = 1, \
.uffd_faults = _uffd_faults }, \
@@ -933,44 +938,51 @@ static struct test_desc tests[] = {
* (S1PTW).
*/
TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
- uffd_data_read_handler, uffd_pt_write_handler, 2),
- /* no_af should also lead to a PT write. */
+ uffd_data_handler, uffd_pt_handler, 2),
TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
- uffd_data_read_handler, uffd_pt_write_handler, 2),
- /* Note how that cas invokes the read handler. */
+ uffd_data_handler, uffd_pt_handler, 2),
TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
- uffd_data_read_handler, uffd_pt_write_handler, 2),
+ uffd_data_handler, uffd_pt_handler, 2),
/*
* Can't test guest_at with_af as it's IMPDEF whether the AF is set.
* The S1PTW fault should still be marked as a write.
*/
TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
- uffd_data_read_handler, uffd_pt_write_handler, 1),
+ uffd_no_handler, uffd_pt_handler, 1),
TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
- uffd_data_read_handler, uffd_pt_write_handler, 2),
+ uffd_data_handler, uffd_pt_handler, 2),
TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
- uffd_data_write_handler, uffd_pt_write_handler, 2),
+ uffd_data_handler, uffd_pt_handler, 2),
TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
- uffd_data_write_handler, uffd_pt_write_handler, 2),
+ uffd_data_handler, uffd_pt_handler, 2),
TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
- uffd_data_write_handler, uffd_pt_write_handler, 2),
+ uffd_data_handler, uffd_pt_handler, 2),
TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
- uffd_data_read_handler, uffd_pt_write_handler, 2),
+ uffd_data_handler, uffd_pt_handler, 2),
/*
* Try accesses when the data and PT memory regions are both
* tracked for dirty logging.
*/
- TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log),
- /* no_af should also lead to a PT write. */
- TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log),
- TEST_DIRTY_LOG(guest_ld_preidx, with_af, guest_check_no_write_in_dirty_log),
- TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log),
- TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log),
- TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log),
- TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log),
- TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log),
- TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log),
+ TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log,
+ guest_check_no_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_ld_preidx, with_af,
+ guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log,
+ guest_check_no_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
/*
* Access when the data and PT memory regions are both marked for
@@ -980,29 +992,43 @@ static struct test_desc tests[] = {
* fault, and nothing in the dirty log. Any S1PTW should result in
* a write in the dirty log and a userfaultfd write.
*/
- TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af, uffd_data_read_handler, 2,
- guest_check_no_write_in_dirty_log),
- /* no_af should also lead to a PT write. */
- TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af, uffd_data_read_handler, 2,
- guest_check_no_write_in_dirty_log),
- TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af, uffd_data_read_handler,
- 2, guest_check_no_write_in_dirty_log),
- TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, 0, 1,
- guest_check_no_write_in_dirty_log),
- TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af, uffd_data_read_handler, 2,
- guest_check_no_write_in_dirty_log),
- TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af, uffd_data_write_handler,
- 2, guest_check_write_in_dirty_log),
- TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af, uffd_data_read_handler, 2,
- guest_check_write_in_dirty_log),
- TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af, uffd_data_write_handler,
- 2, guest_check_write_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af,
+ uffd_data_handler, 2,
+ guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af,
+ uffd_data_handler, 2,
+ guest_check_no_write_in_dirty_log,
+ guest_check_no_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af,
+ uffd_data_handler,
+ 2, guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, uffd_no_handler, 1,
+ guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af,
+ uffd_data_handler, 2,
+ guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af,
+ uffd_data_handler,
+ 2, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af,
+ uffd_data_handler, 2,
+ guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af,
+ uffd_data_handler,
+ 2, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af,
- uffd_data_write_handler, 2,
- guest_check_write_in_dirty_log),
-
+ uffd_data_handler, 2,
+ guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
/*
- * Try accesses when the data memory region is marked read-only
+ * Access when both the PT and data regions are marked read-only
* (with KVM_MEM_READONLY). Writes with a syndrome result in an
* MMIO exit, writes with no syndrome (e.g., CAS) result in a
* failed vcpu run, and reads/execs with and without syndroms do
@@ -1018,7 +1044,7 @@ static struct test_desc tests[] = {
TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx),
/*
- * Access when both the data region is both read-only and marked
+ * The PT and data regions are both read-only and marked
* for dirty logging at the same time. The expected result is that
* for writes there should be no write in the dirty log. The
* readonly handling is the same as if the memslot was not marked
@@ -1043,7 +1069,7 @@ static struct test_desc tests[] = {
guest_check_no_write_in_dirty_log),
/*
- * Access when the data region is both read-only and punched with
+ * The PT and data regions are both read-only and punched with
* holes tracked with userfaultfd. The expected result is the
* union of both userfaultfd and read-only behaviors. For example,
* write accesses result in a userfaultfd write fault and an MMIO
@@ -1051,22 +1077,15 @@ static struct test_desc tests[] = {
* no userfaultfd write fault. Reads result in userfaultfd getting
* triggered.
*/
- TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0,
- uffd_data_read_handler, 2),
- TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0,
- uffd_data_read_handler, 2),
- TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0,
- uffd_no_handler, 1),
- TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0,
- uffd_data_read_handler, 2),
+ TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, uffd_data_handler, 2),
+ TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, uffd_data_handler, 2),
+ TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, uffd_no_handler, 1),
+ TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, uffd_data_handler, 2),
TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1,
- uffd_data_write_handler, 2),
- TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas,
- uffd_data_read_handler, 2),
- TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva,
- uffd_no_handler, 1),
- TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx,
- uffd_no_handler, 1),
+ uffd_data_handler, 2),
+ TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, uffd_data_handler, 2),
+ TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, uffd_no_handler, 1),
+ TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, uffd_no_handler, 1),
{ 0 }
};
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
index ea0978f22db8..251794f83719 100644
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
@@ -241,7 +241,7 @@ int main(int argc, char **argv)
while ((opt = getopt(argc, argv, "hp:t:r")) != -1) {
switch (opt) {
case 'p':
- reclaim_period_ms = atoi_non_negative("Reclaim period", optarg);
+ reclaim_period_ms = atoi_positive("Reclaim period", optarg);
break;
case 't':
token = atoi_paranoid(optarg);
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index dae510c263b4..13c75dc18c10 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -434,6 +434,7 @@ static void *juggle_shinfo_state(void *arg)
int main(int argc, char *argv[])
{
struct timespec min_ts, max_ts, vm_ts;
+ struct kvm_xen_hvm_attr evt_reset;
struct kvm_vm *vm;
pthread_t thread;
bool verbose;
@@ -962,10 +963,8 @@ int main(int argc, char *argv[])
}
done:
- struct kvm_xen_hvm_attr evt_reset = {
- .type = KVM_XEN_ATTR_TYPE_EVTCHN,
- .u.evtchn.flags = KVM_XEN_EVTCHN_RESET,
- };
+ evt_reset.type = KVM_XEN_ATTR_TYPE_EVTCHN;
+ evt_reset.u.evtchn.flags = KVM_XEN_EVTCHN_RESET;
vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset);
alarm(0);
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 291144c284fb..f7900e75d230 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -20,7 +20,7 @@ CLANG_TARGET_FLAGS := $(CLANG_TARGET_FLAGS_$(ARCH))
ifeq ($(CROSS_COMPILE),)
ifeq ($(CLANG_TARGET_FLAGS),)
-$(error Specify CROSS_COMPILE or add '--target=' option to lib.mk
+$(error Specify CROSS_COMPILE or add '--target=' option to lib.mk)
else
CLANG_FLAGS += --target=$(CLANG_TARGET_FLAGS)
endif # CLANG_TARGET_FLAGS
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 9cc84114741d..a6911cae368c 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
bind_bhash
+bind_timewait
csum
cmsg_sender
diag_uid
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
index b57e91e1c3f2..532459a15067 100644
--- a/tools/testing/selftests/net/af_unix/test_unix_oob.c
+++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c
@@ -124,7 +124,7 @@ void producer(struct sockaddr_un *consumer_addr)
wait_for_signal(pipefd[0]);
if (connect(cfd, (struct sockaddr *)consumer_addr,
- sizeof(struct sockaddr)) != 0) {
+ sizeof(*consumer_addr)) != 0) {
perror("Connect failed");
kill(0, SIGTERM);
exit(1);
diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
index b5af08af8559..4a110bb01e53 100755
--- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
+++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
@@ -18,14 +18,15 @@ readonly V4_ADDR1=10.0.10.2
readonly V6_ADDR0=2001:db8:91::1
readonly V6_ADDR1=2001:db8:91::2
nsid=100
+ret=0
cleanup_v6()
{
ip netns del me
ip netns del peer
- sysctl -w net.ipv4.conf.veth0.ndisc_evict_nocarrier=1 >/dev/null 2>&1
- sysctl -w net.ipv4.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1
+ sysctl -w net.ipv6.conf.veth1.ndisc_evict_nocarrier=1 >/dev/null 2>&1
+ sysctl -w net.ipv6.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1
}
create_ns()
@@ -61,7 +62,7 @@ setup_v6() {
if [ $? -ne 0 ]; then
cleanup_v6
echo "failed"
- exit
+ exit 1
fi
# Set veth2 down, which will put veth1 in NOCARRIER state
@@ -88,7 +89,7 @@ setup_v4() {
if [ $? -ne 0 ]; then
cleanup_v4
echo "failed"
- exit
+ exit 1
fi
# Set veth1 down, which will put veth0 in NOCARRIER state
@@ -115,6 +116,7 @@ run_arp_evict_nocarrier_enabled() {
if [ $? -eq 0 ];then
echo "failed"
+ ret=1
else
echo "ok"
fi
@@ -134,6 +136,7 @@ run_arp_evict_nocarrier_disabled() {
echo "ok"
else
echo "failed"
+ ret=1
fi
cleanup_v4
@@ -164,6 +167,7 @@ run_ndisc_evict_nocarrier_enabled() {
if [ $? -eq 0 ];then
echo "failed"
+ ret=1
else
echo "ok"
fi
@@ -182,6 +186,7 @@ run_ndisc_evict_nocarrier_disabled() {
echo "ok"
else
echo "failed"
+ ret=1
fi
cleanup_v6
@@ -198,6 +203,7 @@ run_ndisc_evict_nocarrier_disabled_all() {
echo "ok"
else
echo "failed"
+ ret=1
fi
cleanup_v6
@@ -218,3 +224,4 @@ if [ "$(id -u)" -ne 0 ];then
fi
run_all_tests
+exit $ret
diff --git a/tools/testing/selftests/net/bind_timewait.c b/tools/testing/selftests/net/bind_timewait.c
new file mode 100644
index 000000000000..cb9fdf51ea59
--- /dev/null
+++ b/tools/testing/selftests/net/bind_timewait.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include "../kselftest_harness.h"
+
+FIXTURE(bind_timewait)
+{
+ struct sockaddr_in addr;
+ socklen_t addrlen;
+};
+
+FIXTURE_VARIANT(bind_timewait)
+{
+ __u32 addr_const;
+};
+
+FIXTURE_VARIANT_ADD(bind_timewait, localhost)
+{
+ .addr_const = INADDR_LOOPBACK
+};
+
+FIXTURE_VARIANT_ADD(bind_timewait, addrany)
+{
+ .addr_const = INADDR_ANY
+};
+
+FIXTURE_SETUP(bind_timewait)
+{
+ self->addr.sin_family = AF_INET;
+ self->addr.sin_port = 0;
+ self->addr.sin_addr.s_addr = htonl(variant->addr_const);
+ self->addrlen = sizeof(self->addr);
+}
+
+FIXTURE_TEARDOWN(bind_timewait)
+{
+}
+
+void create_timewait_socket(struct __test_metadata *_metadata,
+ FIXTURE_DATA(bind_timewait) *self)
+{
+ int server_fd, client_fd, child_fd, ret;
+ struct sockaddr_in addr;
+ socklen_t addrlen;
+
+ server_fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GT(server_fd, 0);
+
+ ret = bind(server_fd, (struct sockaddr *)&self->addr, self->addrlen);
+ ASSERT_EQ(ret, 0);
+
+ ret = listen(server_fd, 1);
+ ASSERT_EQ(ret, 0);
+
+ ret = getsockname(server_fd, (struct sockaddr *)&self->addr, &self->addrlen);
+ ASSERT_EQ(ret, 0);
+
+ client_fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GT(client_fd, 0);
+
+ ret = connect(client_fd, (struct sockaddr *)&self->addr, self->addrlen);
+ ASSERT_EQ(ret, 0);
+
+ addrlen = sizeof(addr);
+ child_fd = accept(server_fd, (struct sockaddr *)&addr, &addrlen);
+ ASSERT_GT(child_fd, 0);
+
+ close(child_fd);
+ close(client_fd);
+ close(server_fd);
+}
+
+TEST_F(bind_timewait, 1)
+{
+ int fd, ret;
+
+ create_timewait_socket(_metadata, self);
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GT(fd, 0);
+
+ ret = bind(fd, (struct sockaddr *)&self->addr, self->addrlen);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EADDRINUSE);
+
+ close(fd);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh
index 2d89cb0ad288..330d0b1ceced 100755
--- a/tools/testing/selftests/net/cmsg_ipv6.sh
+++ b/tools/testing/selftests/net/cmsg_ipv6.sh
@@ -6,7 +6,7 @@ ksft_skip=4
NS=ns
IP6=2001:db8:1::1/64
TGT6=2001:db8:1::2
-TMPF=`mktemp`
+TMPF=$(mktemp --suffix ".pcap")
cleanup()
{
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index 75dd83e39207..24b21b15ed3f 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -110,7 +110,7 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
static void cs_parse_args(int argc, char *argv[])
{
- char o;
+ int o;
while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:c:C:l:L:H:")) != -1) {
switch (o) {
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index c245476fa29d..63c3eaec8d30 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -10,8 +10,10 @@ ret=0
PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
IP="ip -netns testns"
+IP_PEER="ip -netns peerns"
RTABLE=100
+RTABLE_PEER=101
GW_IP4=192.51.100.2
SRC_IP=192.51.100.3
GW_IP6=2001:db8:1::2
@@ -20,7 +22,9 @@ SRC_IP6=2001:db8:1::3
DEV_ADDR=192.51.100.1
DEV_ADDR6=2001:db8:1::1
DEV=dummy0
-TESTS="fib_rule6 fib_rule4"
+TESTS="fib_rule6 fib_rule4 fib_rule6_connect fib_rule4_connect"
+
+SELFTEST_PATH=""
log_test()
{
@@ -52,6 +56,31 @@ log_section()
echo "######################################################################"
}
+check_nettest()
+{
+ if which nettest > /dev/null 2>&1; then
+ return 0
+ fi
+
+ # Add the selftest directory to PATH if not already done
+ if [ "${SELFTEST_PATH}" = "" ]; then
+ SELFTEST_PATH="$(dirname $0)"
+ PATH="${PATH}:${SELFTEST_PATH}"
+
+ # Now retry with the new path
+ if which nettest > /dev/null 2>&1; then
+ return 0
+ fi
+
+ if [ "${ret}" -eq 0 ]; then
+ ret="${ksft_skip}"
+ fi
+ echo "nettest not found (try 'make -C ${SELFTEST_PATH} nettest')"
+ fi
+
+ return 1
+}
+
setup()
{
set -e
@@ -72,6 +101,39 @@ cleanup()
ip netns del testns
}
+setup_peer()
+{
+ set -e
+
+ ip netns add peerns
+ $IP_PEER link set dev lo up
+
+ ip link add name veth0 netns testns type veth \
+ peer name veth1 netns peerns
+ $IP link set dev veth0 up
+ $IP_PEER link set dev veth1 up
+
+ $IP address add 192.0.2.10 peer 192.0.2.11/32 dev veth0
+ $IP_PEER address add 192.0.2.11 peer 192.0.2.10/32 dev veth1
+
+ $IP address add 2001:db8::10 peer 2001:db8::11/128 dev veth0 nodad
+ $IP_PEER address add 2001:db8::11 peer 2001:db8::10/128 dev veth1 nodad
+
+ $IP_PEER address add 198.51.100.11/32 dev lo
+ $IP route add table $RTABLE_PEER 198.51.100.11/32 via 192.0.2.11
+
+ $IP_PEER address add 2001:db8::1:11/128 dev lo
+ $IP route add table $RTABLE_PEER 2001:db8::1:11/128 via 2001:db8::11
+
+ set +e
+}
+
+cleanup_peer()
+{
+ $IP link del dev veth0
+ ip netns del peerns
+}
+
fib_check_iproute_support()
{
ip rule help 2>&1 | grep -q $1
@@ -190,6 +252,37 @@ fib_rule6_test()
fi
}
+# Verify that the IPV6_TCLASS option of UDPv6 and TCPv6 sockets is properly
+# taken into account when connecting the socket and when sending packets.
+fib_rule6_connect_test()
+{
+ local dsfield
+
+ if ! check_nettest; then
+ echo "SKIP: Could not run test without nettest tool"
+ return
+ fi
+
+ setup_peer
+ $IP -6 rule add dsfield 0x04 table $RTABLE_PEER
+
+ # Combine the base DS Field value (0x04) with all possible ECN values
+ # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3).
+ # The ECN bits shouldn't influence the result of the test.
+ for dsfield in 0x04 0x05 0x06 0x07; do
+ nettest -q -6 -B -t 5 -N testns -O peerns -U -D \
+ -Q "${dsfield}" -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 0 "rule6 dsfield udp connect (dsfield ${dsfield})"
+
+ nettest -q -6 -B -t 5 -N testns -O peerns -Q "${dsfield}" \
+ -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})"
+ done
+
+ $IP -6 rule del dsfield 0x04 table $RTABLE_PEER
+ cleanup_peer
+}
+
fib_rule4_del()
{
$IP rule del $1
@@ -296,6 +389,37 @@ fib_rule4_test()
fi
}
+# Verify that the IP_TOS option of UDPv4 and TCPv4 sockets is properly taken
+# into account when connecting the socket and when sending packets.
+fib_rule4_connect_test()
+{
+ local dsfield
+
+ if ! check_nettest; then
+ echo "SKIP: Could not run test without nettest tool"
+ return
+ fi
+
+ setup_peer
+ $IP -4 rule add dsfield 0x04 table $RTABLE_PEER
+
+ # Combine the base DS Field value (0x04) with all possible ECN values
+ # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3).
+ # The ECN bits shouldn't influence the result of the test.
+ for dsfield in 0x04 0x05 0x06 0x07; do
+ nettest -q -B -t 5 -N testns -O peerns -D -U -Q "${dsfield}" \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 0 "rule4 dsfield udp connect (dsfield ${dsfield})"
+
+ nettest -q -B -t 5 -N testns -O peerns -Q "${dsfield}" \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})"
+ done
+
+ $IP -4 rule del dsfield 0x04 table $RTABLE_PEER
+ cleanup_peer
+}
+
run_fibrule_tests()
{
log_section "IPv4 fib rule"
@@ -345,6 +469,8 @@ do
case $t in
fib_rule6_test|fib_rule6) fib_rule6_test;;
fib_rule4_test|fib_rule4) fib_rule4_test;;
+ fib_rule6_connect_test|fib_rule6_connect) fib_rule6_connect_test;;
+ fib_rule4_connect_test|fib_rule4_connect) fib_rule4_connect_test;;
help) echo "Test names: $TESTS"; exit 0;;
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 1c4f866de7d7..3d8e4ebda1b6 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -914,14 +914,14 @@ sysctl_set()
local value=$1; shift
SYSCTL_ORIG[$key]=$(sysctl -n $key)
- sysctl -qw $key=$value
+ sysctl -qw $key="$value"
}
sysctl_restore()
{
local key=$1; shift
- sysctl -qw $key=${SYSCTL_ORIG["$key"]}
+ sysctl -qw $key="${SYSCTL_ORIG[$key]}"
}
forwarding_enable()
diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh
index dca1e6f777a8..f11756e7df2f 100755
--- a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh
+++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh
@@ -12,19 +12,27 @@
# In addition this script also checks if forcing a specific field in the
# outer header is working.
+# Return 4 by default (Kselftest SKIP code)
+ERR=4
+
if [ "$(id -u)" != "0" ]; then
echo "Please run as root."
- exit 0
+ exit $ERR
fi
if ! which tcpdump > /dev/null 2>&1; then
echo "No tcpdump found. Required for this test."
- exit 0
+ exit $ERR
fi
expected_tos="0x00"
expected_ttl="0"
failed=false
+readonly NS0=$(mktemp -u ns0-XXXXXXXX)
+readonly NS1=$(mktemp -u ns1-XXXXXXXX)
+
+RUN_NS0="ip netns exec ${NS0}"
+
get_random_tos() {
# Get a random hex tos value between 0x00 and 0xfc, a multiple of 4
echo "0x$(tr -dc '0-9a-f' < /dev/urandom | head -c 1)\
@@ -61,7 +69,6 @@ setup() {
local vlan="$5"
local test_tos="0x00"
local test_ttl="0"
- local ns="ip netns exec testing"
# We don't want a test-tos of 0x00,
# because this is the value that we get when no tos is set.
@@ -94,14 +101,15 @@ setup() {
printf "│%7s │%6s │%6s │%13s │%13s │%6s │" \
"$type" "$outer" "$inner" "$tos" "$ttl" "$vlan"
- # Create 'testing' netns, veth pair and connect main ns with testing ns
- ip netns add testing
- ip link add type veth
- ip link set veth1 netns testing
- ip link set veth0 up
- $ns ip link set veth1 up
- ip addr flush dev veth0
- $ns ip addr flush dev veth1
+ # Create netns NS0 and NS1 and connect them with a veth pair
+ ip netns add "${NS0}"
+ ip netns add "${NS1}"
+ ip link add name veth0 netns "${NS0}" type veth \
+ peer name veth1 netns "${NS1}"
+ ip -netns "${NS0}" link set dev veth0 up
+ ip -netns "${NS1}" link set dev veth1 up
+ ip -netns "${NS0}" address flush dev veth0
+ ip -netns "${NS1}" address flush dev veth1
local local_addr1=""
local local_addr2=""
@@ -127,51 +135,59 @@ setup() {
if [ "$type" = "gre" ]; then
type="gretap"
fi
- ip addr add 198.18.0.1/24 dev veth0
- $ns ip addr add 198.18.0.2/24 dev veth1
- ip link add name tep0 type $type $local_addr1 remote \
- 198.18.0.2 tos $test_tos ttl $test_ttl $vxlan $geneve
- $ns ip link add name tep1 type $type $local_addr2 remote \
- 198.18.0.1 tos $test_tos ttl $test_ttl $vxlan $geneve
+ ip -netns "${NS0}" address add 198.18.0.1/24 dev veth0
+ ip -netns "${NS1}" address add 198.18.0.2/24 dev veth1
+ ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \
+ remote 198.18.0.2 tos $test_tos ttl $test_ttl \
+ $vxlan $geneve
+ ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \
+ remote 198.18.0.1 tos $test_tos ttl $test_ttl \
+ $vxlan $geneve
elif [ "$outer" = "6" ]; then
if [ "$type" = "gre" ]; then
type="ip6gretap"
fi
- ip addr add fdd1:ced0:5d88:3fce::1/64 dev veth0
- $ns ip addr add fdd1:ced0:5d88:3fce::2/64 dev veth1
- ip link add name tep0 type $type $local_addr1 \
- remote fdd1:ced0:5d88:3fce::2 tos $test_tos ttl $test_ttl \
- $vxlan $geneve
- $ns ip link add name tep1 type $type $local_addr2 \
- remote fdd1:ced0:5d88:3fce::1 tos $test_tos ttl $test_ttl \
- $vxlan $geneve
+ ip -netns "${NS0}" address add fdd1:ced0:5d88:3fce::1/64 \
+ dev veth0 nodad
+ ip -netns "${NS1}" address add fdd1:ced0:5d88:3fce::2/64 \
+ dev veth1 nodad
+ ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \
+ remote fdd1:ced0:5d88:3fce::2 tos $test_tos \
+ ttl $test_ttl $vxlan $geneve
+ ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \
+ remote fdd1:ced0:5d88:3fce::1 tos $test_tos \
+ ttl $test_ttl $vxlan $geneve
fi
# Bring L2-tunnel link up and create VLAN on top
- ip link set tep0 up
- $ns ip link set tep1 up
- ip addr flush dev tep0
- $ns ip addr flush dev tep1
+ ip -netns "${NS0}" link set tep0 up
+ ip -netns "${NS1}" link set tep1 up
+ ip -netns "${NS0}" address flush dev tep0
+ ip -netns "${NS1}" address flush dev tep1
local parent
if $vlan; then
parent="vlan99-"
- ip link add link tep0 name ${parent}0 type vlan id 99
- $ns ip link add link tep1 name ${parent}1 type vlan id 99
- ip link set ${parent}0 up
- $ns ip link set ${parent}1 up
- ip addr flush dev ${parent}0
- $ns ip addr flush dev ${parent}1
+ ip -netns "${NS0}" link add link tep0 name ${parent}0 \
+ type vlan id 99
+ ip -netns "${NS1}" link add link tep1 name ${parent}1 \
+ type vlan id 99
+ ip -netns "${NS0}" link set dev ${parent}0 up
+ ip -netns "${NS1}" link set dev ${parent}1 up
+ ip -netns "${NS0}" address flush dev ${parent}0
+ ip -netns "${NS1}" address flush dev ${parent}1
else
parent="tep"
fi
# Assign inner IPv4/IPv6 addresses
if [ "$inner" = "4" ] || [ "$inner" = "other" ]; then
- ip addr add 198.19.0.1/24 brd + dev ${parent}0
- $ns ip addr add 198.19.0.2/24 brd + dev ${parent}1
+ ip -netns "${NS0}" address add 198.19.0.1/24 brd + dev ${parent}0
+ ip -netns "${NS1}" address add 198.19.0.2/24 brd + dev ${parent}1
elif [ "$inner" = "6" ]; then
- ip addr add fdd4:96cf:4eae:443b::1/64 dev ${parent}0
- $ns ip addr add fdd4:96cf:4eae:443b::2/64 dev ${parent}1
+ ip -netns "${NS0}" address add fdd4:96cf:4eae:443b::1/64 \
+ dev ${parent}0 nodad
+ ip -netns "${NS1}" address add fdd4:96cf:4eae:443b::2/64 \
+ dev ${parent}1 nodad
fi
}
@@ -192,10 +208,10 @@ verify() {
ping_dst="198.19.0.3" # Generates ARPs which are not IPv4/IPv6
fi
if [ "$tos_ttl" = "inherit" ]; then
- ping -i 0.1 $ping_dst -Q "$expected_tos" -t "$expected_ttl" \
- 2>/dev/null 1>&2 & ping_pid="$!"
+ ${RUN_NS0} ping -i 0.1 $ping_dst -Q "$expected_tos" \
+ -t "$expected_ttl" 2>/dev/null 1>&2 & ping_pid="$!"
else
- ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!"
+ ${RUN_NS0} ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!"
fi
local tunnel_type_offset tunnel_type_proto req_proto_offset req_offset
if [ "$type" = "gre" ]; then
@@ -216,10 +232,12 @@ verify() {
req_proto_offset="$((req_proto_offset + 4))"
req_offset="$((req_offset + 4))"
fi
- out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
- ip[$tunnel_type_offset] = $tunnel_type_proto and \
- ip[$req_proto_offset] = 0x01 and \
- ip[$req_offset] = 0x08 2>/dev/null | head -n 1)"
+ out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \
+ -i veth0 -n \
+ ip[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip[$req_proto_offset] = 0x01 and \
+ ip[$req_offset] = 0x08 2>/dev/null \
+ | head -n 1)"
elif [ "$inner" = "6" ]; then
req_proto_offset="44"
req_offset="78"
@@ -231,10 +249,12 @@ verify() {
req_proto_offset="$((req_proto_offset + 4))"
req_offset="$((req_offset + 4))"
fi
- out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
- ip[$tunnel_type_offset] = $tunnel_type_proto and \
- ip[$req_proto_offset] = 0x3a and \
- ip[$req_offset] = 0x80 2>/dev/null | head -n 1)"
+ out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \
+ -i veth0 -n \
+ ip[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip[$req_proto_offset] = 0x3a and \
+ ip[$req_offset] = 0x80 2>/dev/null \
+ | head -n 1)"
elif [ "$inner" = "other" ]; then
req_proto_offset="36"
req_offset="45"
@@ -250,11 +270,13 @@ verify() {
expected_tos="0x00"
expected_ttl="64"
fi
- out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
- ip[$tunnel_type_offset] = $tunnel_type_proto and \
- ip[$req_proto_offset] = 0x08 and \
- ip[$((req_proto_offset + 1))] = 0x06 and \
- ip[$req_offset] = 0x01 2>/dev/null | head -n 1)"
+ out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \
+ -i veth0 -n \
+ ip[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip[$req_proto_offset] = 0x08 and \
+ ip[$((req_proto_offset + 1))] = 0x06 and \
+ ip[$req_offset] = 0x01 2>/dev/null \
+ | head -n 1)"
fi
elif [ "$outer" = "6" ]; then
if [ "$type" = "gre" ]; then
@@ -273,10 +295,12 @@ verify() {
req_proto_offset="$((req_proto_offset + 4))"
req_offset="$((req_offset + 4))"
fi
- out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
- ip6[$tunnel_type_offset] = $tunnel_type_proto and \
- ip6[$req_proto_offset] = 0x01 and \
- ip6[$req_offset] = 0x08 2>/dev/null | head -n 1)"
+ out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \
+ -i veth0 -n \
+ ip6[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip6[$req_proto_offset] = 0x01 and \
+ ip6[$req_offset] = 0x08 2>/dev/null \
+ | head -n 1)"
elif [ "$inner" = "6" ]; then
local req_proto_offset="72"
local req_offset="106"
@@ -288,10 +312,12 @@ verify() {
req_proto_offset="$((req_proto_offset + 4))"
req_offset="$((req_offset + 4))"
fi
- out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
- ip6[$tunnel_type_offset] = $tunnel_type_proto and \
- ip6[$req_proto_offset] = 0x3a and \
- ip6[$req_offset] = 0x80 2>/dev/null | head -n 1)"
+ out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \
+ -i veth0 -n \
+ ip6[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip6[$req_proto_offset] = 0x3a and \
+ ip6[$req_offset] = 0x80 2>/dev/null \
+ | head -n 1)"
elif [ "$inner" = "other" ]; then
local req_proto_offset="64"
local req_offset="73"
@@ -307,15 +333,17 @@ verify() {
expected_tos="0x00"
expected_ttl="64"
fi
- out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
- ip6[$tunnel_type_offset] = $tunnel_type_proto and \
- ip6[$req_proto_offset] = 0x08 and \
- ip6[$((req_proto_offset + 1))] = 0x06 and \
- ip6[$req_offset] = 0x01 2>/dev/null | head -n 1)"
+ out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \
+ -i veth0 -n \
+ ip6[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip6[$req_proto_offset] = 0x08 and \
+ ip6[$((req_proto_offset + 1))] = 0x06 and \
+ ip6[$req_offset] = 0x01 2>/dev/null \
+ | head -n 1)"
fi
fi
kill -9 $ping_pid
- wait $ping_pid 2>/dev/null
+ wait $ping_pid 2>/dev/null || true
result="FAIL"
if [ "$outer" = "4" ]; then
captured_ttl="$(get_field "ttl" "$out")"
@@ -351,11 +379,35 @@ verify() {
}
cleanup() {
- ip link del veth0 2>/dev/null
- ip netns del testing 2>/dev/null
- ip link del tep0 2>/dev/null
+ ip netns del "${NS0}" 2>/dev/null
+ ip netns del "${NS1}" 2>/dev/null
}
+exit_handler() {
+ # Don't exit immediately if one of the intermediate commands fails.
+ # We might be called at the end of the script, when the network
+ # namespaces have already been deleted. So cleanup() may fail, but we
+ # still need to run until 'exit $ERR' or the script won't return the
+ # correct error code.
+ set +e
+
+ cleanup
+
+ exit $ERR
+}
+
+# Restore the default SIGINT handler (just in case) and exit.
+# The exit handler will take care of cleaning everything up.
+interrupted() {
+ trap - INT
+
+ exit $ERR
+}
+
+set -e
+trap exit_handler EXIT
+trap interrupted INT
+
printf "┌────────┬───────┬───────┬──────────────┬"
printf "──────────────┬───────┬────────┐\n"
for type in gre vxlan geneve; do
@@ -385,6 +437,10 @@ done
printf "└────────┴───────┴───────┴──────────────┴"
printf "──────────────┴───────┴────────┘\n"
+# All tests done.
+# Set ERR appropriately: it will be returned by the exit handler.
if $failed; then
- exit 1
+ ERR=1
+else
+ ERR=0
fi
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index d11d3d566608..079f8f46849d 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -498,6 +498,12 @@ kill_events_pids()
kill_wait $evts_ns2_pid
}
+kill_tests_wait()
+{
+ kill -SIGUSR1 $(ip netns pids $ns2) $(ip netns pids $ns1)
+ wait
+}
+
pm_nl_set_limits()
{
local ns=$1
@@ -1694,6 +1700,7 @@ chk_subflow_nr()
local subflow_nr=$3
local cnt1
local cnt2
+ local dump_stats
if [ -n "${need_title}" ]; then
printf "%03u %-36s %s" "${TEST_COUNT}" "${TEST_NAME}" "${msg}"
@@ -1711,7 +1718,12 @@ chk_subflow_nr()
echo "[ ok ]"
fi
- [ "${dump_stats}" = 1 ] && ( ss -N $ns1 -tOni ; ss -N $ns1 -tOni | grep token; ip -n $ns1 mptcp endpoint )
+ if [ "${dump_stats}" = 1 ]; then
+ ss -N $ns1 -tOni
+ ss -N $ns1 -tOni | grep token
+ ip -n $ns1 mptcp endpoint
+ dump_stats
+ fi
}
chk_link_usage()
@@ -3049,7 +3061,7 @@ endpoint_tests()
pm_nl_set_limits $ns1 2 2
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
- run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow &
+ run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow 2>/dev/null &
wait_mpj $ns1
pm_nl_check_endpoint 1 "creation" \
@@ -3062,14 +3074,14 @@ endpoint_tests()
pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
pm_nl_check_endpoint 0 "modif is allowed" \
$ns2 10.0.2.2 id 1 flags signal
- wait
+ kill_tests_wait
fi
if reset "delete and re-add"; then
pm_nl_set_limits $ns1 1 1
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 4 0 0 slow &
+ run_tests $ns1 $ns2 10.0.1.1 4 0 0 speed_20 2>/dev/null &
wait_mpj $ns2
pm_nl_del_endpoint $ns2 2 10.0.2.2
@@ -3079,7 +3091,7 @@ endpoint_tests()
pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
wait_mpj $ns2
chk_subflow_nr "" "after re-add" 2
- wait
+ kill_tests_wait
fi
}
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index a29deb9fa024..ab2d581f28a1 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -752,6 +752,52 @@ test_subflows()
"$server4_token" > /dev/null 2>&1
}
+test_subflows_v4_v6_mix()
+{
+ # Attempt to add a listener at 10.0.2.1:<subflow-port>
+ ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\
+ $app6_port > /dev/null 2>&1 &
+ local listener_pid=$!
+
+ # ADD_ADDR4 from server to client machine reusing the subflow port on
+ # the established v6 connection
+ :>"$client_evts"
+ ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server6_token" id\
+ $server_addr_id dev ns1eth2 > /dev/null 2>&1
+ stdbuf -o0 -e0 printf "ADD_ADDR4 id:%d 10.0.2.1 (ns1) => ns2, reuse port\t\t" $server_addr_id
+ sleep 0.5
+ verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "10.0.2.1"\
+ "$server_addr_id" "$app6_port"
+
+ # CREATE_SUBFLOW from client to server machine
+ :>"$client_evts"
+ ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\
+ $app6_port token "$client6_token" > /dev/null 2>&1
+ sleep 0.5
+ verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client6_token"\
+ "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\
+ "$server_addr_id" "ns2" "ns1"
+
+ # Delete the listener from the server ns, if one was created
+ kill_wait $listener_pid
+
+ sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
+
+ # DESTROY_SUBFLOW from client to server machine
+ :>"$client_evts"
+ ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\
+ $app6_port token "$client6_token" > /dev/null 2>&1
+ sleep 0.5
+ verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client6_token" \
+ "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\
+ "$server_addr_id" "ns2" "ns1"
+
+ # RM_ADDR from server to client machine
+ ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\
+ "$server6_token" > /dev/null 2>&1
+ sleep 0.5
+}
+
test_prio()
{
local count
@@ -861,6 +907,7 @@ make_connection "v6"
test_announce
test_remove
test_subflows
+test_subflows_v4_v6_mix
test_prio
test_listener
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index 7900fa98eccb..ee9a72982705 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -87,6 +87,7 @@ struct sock_args {
int use_setsockopt;
int use_freebind;
int use_cmsg;
+ uint8_t dsfield;
const char *dev;
const char *server_dev;
int ifindex;
@@ -580,6 +581,36 @@ static int set_reuseaddr(int sd)
return rc;
}
+static int set_dsfield(int sd, int version, int dsfield)
+{
+ if (!dsfield)
+ return 0;
+
+ switch (version) {
+ case AF_INET:
+ if (setsockopt(sd, SOL_IP, IP_TOS, &dsfield,
+ sizeof(dsfield)) < 0) {
+ log_err_errno("setsockopt(IP_TOS)");
+ return -1;
+ }
+ break;
+
+ case AF_INET6:
+ if (setsockopt(sd, SOL_IPV6, IPV6_TCLASS, &dsfield,
+ sizeof(dsfield)) < 0) {
+ log_err_errno("setsockopt(IPV6_TCLASS)");
+ return -1;
+ }
+ break;
+
+ default:
+ log_error("Invalid address family\n");
+ return -1;
+ }
+
+ return 0;
+}
+
static int str_to_uint(const char *str, int min, int max, unsigned int *value)
{
int number;
@@ -1317,6 +1348,9 @@ static int msock_init(struct sock_args *args, int server)
(char *)&one, sizeof(one)) < 0)
log_err_errno("Setting SO_BROADCAST error");
+ if (set_dsfield(sd, AF_INET, args->dsfield) != 0)
+ goto out_err;
+
if (args->dev && bind_to_device(sd, args->dev) != 0)
goto out_err;
else if (args->use_setsockopt &&
@@ -1445,6 +1479,9 @@ static int lsock_init(struct sock_args *args)
if (set_reuseport(sd) != 0)
goto err;
+ if (set_dsfield(sd, args->version, args->dsfield) != 0)
+ goto err;
+
if (args->dev && bind_to_device(sd, args->dev) != 0)
goto err;
else if (args->use_setsockopt &&
@@ -1658,6 +1695,9 @@ static int connectsock(void *addr, socklen_t alen, struct sock_args *args)
if (set_reuseport(sd) != 0)
goto err;
+ if (set_dsfield(sd, args->version, args->dsfield) != 0)
+ goto err;
+
if (args->dev && bind_to_device(sd, args->dev) != 0)
goto err;
else if (args->use_setsockopt &&
@@ -1862,7 +1902,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args)
return client_status;
}
-#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SUCi6xL:0:1:2:3:Fbqf"
+#define GETOPT_STR "sr:l:c:Q:p:t:g:P:DRn:M:X:m:d:I:BN:O:SUCi6xL:0:1:2:3:Fbqf"
#define OPT_FORCE_BIND_KEY_IFINDEX 1001
#define OPT_NO_BIND_KEY_IFINDEX 1002
@@ -1893,6 +1933,8 @@ static void print_usage(char *prog)
" -D|R datagram (D) / raw (R) socket (default stream)\n"
" -l addr local address to bind to in server mode\n"
" -c addr local address to bind to in client mode\n"
+ " -Q dsfield DS Field value of the socket (the IP_TOS or\n"
+ " IPV6_TCLASS socket option)\n"
" -x configure XFRM policy on socket\n"
"\n"
" -d dev bind socket to given device name\n"
@@ -1971,6 +2013,13 @@ int main(int argc, char *argv[])
args.has_local_ip = 1;
args.client_local_addr_str = optarg;
break;
+ case 'Q':
+ if (str_to_uint(optarg, 0, 255, &tmp) != 0) {
+ fprintf(stderr, "Invalid DS Field\n");
+ return 1;
+ }
+ args.dsfield = tmp;
+ break;
case 'p':
if (str_to_uint(optarg, 1, 65535, &tmp) != 0) {
fprintf(stderr, "Invalid port\n");
diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
index 704997ffc244..8c3ac0a72545 100755
--- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
+++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
@@ -293,19 +293,11 @@ setup-vm() {
elif [[ -n $vtype && $vtype == "vnifilterg" ]]; then
# Add per vni group config with 'bridge vni' api
if [ -n "$group" ]; then
- if [ "$family" == "v4" ]; then
- if [ $mcast -eq 1 ]; then
- bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group $group
- else
- bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote $group
- fi
- else
- if [ $mcast -eq 1 ]; then
- bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group6 $group
- else
- bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote6 $group
- fi
- fi
+ if [ $mcast -eq 1 ]; then
+ bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group $group
+ else
+ bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote $group
+ fi
fi
fi
done
diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c
index 90026a27eac0..9ba03164d73a 100644
--- a/tools/testing/selftests/net/toeplitz.c
+++ b/tools/testing/selftests/net/toeplitz.c
@@ -215,7 +215,7 @@ static char *recv_frame(const struct ring_state *ring, char *frame)
}
/* A single TPACKET_V3 block can hold multiple frames */
-static void recv_block(struct ring_state *ring)
+static bool recv_block(struct ring_state *ring)
{
struct tpacket_block_desc *block;
char *frame;
@@ -223,7 +223,7 @@ static void recv_block(struct ring_state *ring)
block = (void *)(ring->mmap + ring->idx * ring_block_sz);
if (!(block->hdr.bh1.block_status & TP_STATUS_USER))
- return;
+ return false;
frame = (char *)block;
frame += block->hdr.bh1.offset_to_first_pkt;
@@ -235,6 +235,8 @@ static void recv_block(struct ring_state *ring)
block->hdr.bh1.block_status = TP_STATUS_KERNEL;
ring->idx = (ring->idx + 1) % ring_block_nr;
+
+ return true;
}
/* simple test: sleep once unconditionally and then process all rings */
@@ -245,7 +247,7 @@ static void process_rings(void)
usleep(1000 * cfg_timeout_msec);
for (i = 0; i < num_cpus; i++)
- recv_block(&rings[i]);
+ do {} while (recv_block(&rings[i]));
fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
frames_received - frames_nohash - frames_error,
@@ -257,12 +259,12 @@ static char *setup_ring(int fd)
struct tpacket_req3 req3 = {0};
void *ring;
- req3.tp_retire_blk_tov = cfg_timeout_msec;
+ req3.tp_retire_blk_tov = cfg_timeout_msec / 8;
req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
req3.tp_frame_size = 2048;
req3.tp_frame_nr = 1 << 10;
- req3.tp_block_nr = 2;
+ req3.tp_block_nr = 16;
req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr;
req3.tp_block_size /= req3.tp_block_nr;
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
index dc932fd65363..640bc43452fa 100755
--- a/tools/testing/selftests/net/udpgso_bench.sh
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -7,6 +7,7 @@ readonly GREEN='\033[0;92m'
readonly YELLOW='\033[0;33m'
readonly RED='\033[0;31m'
readonly NC='\033[0m' # No Color
+readonly TESTPORT=8000
readonly KSFT_PASS=0
readonly KSFT_FAIL=1
@@ -56,11 +57,26 @@ trap wake_children EXIT
run_one() {
local -r args=$@
+ local nr_socks=0
+ local i=0
+ local -r timeout=10
+
+ ./udpgso_bench_rx -p "$TESTPORT" &
+ ./udpgso_bench_rx -p "$TESTPORT" -t &
+
+ # Wait for the above test program to get ready to receive connections.
+ while [ "$i" -lt "$timeout" ]; do
+ nr_socks="$(ss -lnHi | grep -c "\*:${TESTPORT}")"
+ [ "$nr_socks" -eq 2 ] && break
+ i=$((i + 1))
+ sleep 1
+ done
+ if [ "$nr_socks" -ne 2 ]; then
+ echo "timed out while waiting for udpgso_bench_rx"
+ exit 1
+ fi
- ./udpgso_bench_rx &
- ./udpgso_bench_rx -t &
-
- ./udpgso_bench_tx ${args}
+ ./udpgso_bench_tx -p "$TESTPORT" ${args}
}
run_in_netns() {
diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
index 6a193425c367..4058c7451e70 100644
--- a/tools/testing/selftests/net/udpgso_bench_rx.c
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -250,7 +250,7 @@ static int recv_msg(int fd, char *buf, int len, int *gso_size)
static void do_flush_udp(int fd)
{
static char rbuf[ETH_MAX_MTU];
- int ret, len, gso_size, budget = 256;
+ int ret, len, gso_size = 0, budget = 256;
len = cfg_read_all ? sizeof(rbuf) : 0;
while (budget--) {
@@ -336,6 +336,8 @@ static void parse_opts(int argc, char **argv)
cfg_verify = true;
cfg_read_all = true;
break;
+ default:
+ exit(1);
}
}
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
index f1fdaa270291..477392715a9a 100644
--- a/tools/testing/selftests/net/udpgso_bench_tx.c
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -62,6 +62,7 @@ static int cfg_payload_len = (1472 * 42);
static int cfg_port = 8000;
static int cfg_runtime_ms = -1;
static bool cfg_poll;
+static int cfg_poll_loop_timeout_ms = 2000;
static bool cfg_segment;
static bool cfg_sendmmsg;
static bool cfg_tcp;
@@ -235,16 +236,17 @@ static void flush_errqueue_recv(int fd)
}
}
-static void flush_errqueue(int fd, const bool do_poll)
+static void flush_errqueue(int fd, const bool do_poll,
+ unsigned long poll_timeout, const bool poll_err)
{
if (do_poll) {
struct pollfd fds = {0};
int ret;
fds.fd = fd;
- ret = poll(&fds, 1, 500);
+ ret = poll(&fds, 1, poll_timeout);
if (ret == 0) {
- if (cfg_verbose)
+ if ((cfg_verbose) && (poll_err))
fprintf(stderr, "poll timeout\n");
} else if (ret < 0) {
error(1, errno, "poll");
@@ -254,6 +256,20 @@ static void flush_errqueue(int fd, const bool do_poll)
flush_errqueue_recv(fd);
}
+static void flush_errqueue_retry(int fd, unsigned long num_sends)
+{
+ unsigned long tnow, tstop;
+ bool first_try = true;
+
+ tnow = gettimeofday_ms();
+ tstop = tnow + cfg_poll_loop_timeout_ms;
+ do {
+ flush_errqueue(fd, true, tstop - tnow, first_try);
+ first_try = false;
+ tnow = gettimeofday_ms();
+ } while ((stat_zcopies != num_sends) && (tnow < tstop));
+}
+
static int send_tcp(int fd, char *data)
{
int ret, done = 0, count = 0;
@@ -413,7 +429,8 @@ static int send_udp_segment(int fd, char *data)
static void usage(const char *filepath)
{
- error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]",
+ error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] "
+ "[-L secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]",
filepath);
}
@@ -423,7 +440,7 @@ static void parse_opts(int argc, char **argv)
int max_len, hdrlen;
int c;
- while ((c = getopt(argc, argv, "46acC:D:Hl:mM:p:s:PS:tTuvz")) != -1) {
+ while ((c = getopt(argc, argv, "46acC:D:Hl:L:mM:p:s:PS:tTuvz")) != -1) {
switch (c) {
case '4':
if (cfg_family != PF_UNSPEC)
@@ -452,6 +469,9 @@ static void parse_opts(int argc, char **argv)
case 'l':
cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000;
break;
+ case 'L':
+ cfg_poll_loop_timeout_ms = strtoul(optarg, NULL, 10) * 1000;
+ break;
case 'm':
cfg_sendmmsg = true;
break;
@@ -490,6 +510,8 @@ static void parse_opts(int argc, char **argv)
case 'z':
cfg_zerocopy = true;
break;
+ default:
+ exit(1);
}
}
@@ -677,7 +699,7 @@ int main(int argc, char **argv)
num_sends += send_udp(fd, buf[i]);
num_msgs++;
if ((cfg_zerocopy && ((num_msgs & 0xF) == 0)) || cfg_tx_tstamp)
- flush_errqueue(fd, cfg_poll);
+ flush_errqueue(fd, cfg_poll, 500, true);
if (cfg_msg_nr && num_msgs >= cfg_msg_nr)
break;
@@ -696,7 +718,7 @@ int main(int argc, char **argv)
} while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop));
if (cfg_zerocopy || cfg_tx_tstamp)
- flush_errqueue(fd, true);
+ flush_errqueue_retry(fd, num_sends);
if (close(fd))
error(1, errno, "close");
diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh
index a7f62ad4f661..2ffba45a78bf 100755
--- a/tools/testing/selftests/netfilter/nft_trans_stress.sh
+++ b/tools/testing/selftests/netfilter/nft_trans_stress.sh
@@ -10,12 +10,20 @@
ksft_skip=4
testns=testns-$(mktemp -u "XXXXXXXX")
+tmp=""
tables="foo bar baz quux"
global_ret=0
eret=0
lret=0
+cleanup() {
+ ip netns pids "$testns" | xargs kill 2>/dev/null
+ ip netns del "$testns"
+
+ rm -f "$tmp"
+}
+
check_result()
{
local r=$1
@@ -43,6 +51,7 @@ if [ $? -ne 0 ];then
exit $ksft_skip
fi
+trap cleanup EXIT
tmp=$(mktemp)
for table in $tables; do
@@ -139,11 +148,4 @@ done
check_result $lret "add/delete with nftrace enabled"
-pkill -9 ping
-
-wait
-
-rm -f "$tmp"
-ip netns del "$testns"
-
exit $global_ret
diff --git a/tools/testing/selftests/netfilter/settings b/tools/testing/selftests/netfilter/settings
new file mode 100644
index 000000000000..6091b45d226b
--- /dev/null
+++ b/tools/testing/selftests/netfilter/settings
@@ -0,0 +1 @@
+timeout=120
diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c
index d95b1cb43d9d..7588428b8fcd 100644
--- a/tools/testing/selftests/proc/proc-empty-vm.c
+++ b/tools/testing/selftests/proc/proc-empty-vm.c
@@ -25,6 +25,7 @@
#undef NDEBUG
#include <assert.h>
#include <errno.h>
+#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -41,7 +42,7 @@
* 1: vsyscall VMA is --xp vsyscall=xonly
* 2: vsyscall VMA is r-xp vsyscall=emulate
*/
-static int g_vsyscall;
+static volatile int g_vsyscall;
static const char *g_proc_pid_maps_vsyscall;
static const char *g_proc_pid_smaps_vsyscall;
@@ -147,11 +148,12 @@ static void vsyscall(void)
g_vsyscall = 0;
/* gettimeofday(NULL, NULL); */
+ uint64_t rax = 0xffffffffff600000;
asm volatile (
- "call %P0"
- :
- : "i" (0xffffffffff600000), "D" (NULL), "S" (NULL)
- : "rax", "rcx", "r11"
+ "call *%[rax]"
+ : [rax] "+a" (rax)
+ : "D" (NULL), "S" (NULL)
+ : "rcx", "r11"
);
g_vsyscall = 1;
diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c
index 69551bfa215c..cacbd2a4aec9 100644
--- a/tools/testing/selftests/proc/proc-pid-vm.c
+++ b/tools/testing/selftests/proc/proc-pid-vm.c
@@ -257,11 +257,12 @@ static void vsyscall(void)
g_vsyscall = 0;
/* gettimeofday(NULL, NULL); */
+ uint64_t rax = 0xffffffffff600000;
asm volatile (
- "call %P0"
- :
- : "i" (0xffffffffff600000), "D" (NULL), "S" (NULL)
- : "rax", "rcx", "r11"
+ "call *%[rax]"
+ : [rax] "+a" (rax)
+ : "D" (NULL), "S" (NULL)
+ : "rcx", "r11"
);
g_vsyscall = 1;
diff --git a/tools/testing/selftests/vm/hugetlb-madvise.c b/tools/testing/selftests/vm/hugetlb-madvise.c
index a634f47d1e56..9a127a8fe176 100644
--- a/tools/testing/selftests/vm/hugetlb-madvise.c
+++ b/tools/testing/selftests/vm/hugetlb-madvise.c
@@ -17,7 +17,6 @@
#include <stdio.h>
#include <unistd.h>
#include <sys/mman.h>
-#define __USE_GNU
#include <fcntl.h>
#define MIN_FREE_PAGES 20
diff --git a/tools/virtio/linux/bug.h b/tools/virtio/linux/bug.h
index 813baf13f62a..51a919083d9b 100644
--- a/tools/virtio/linux/bug.h
+++ b/tools/virtio/linux/bug.h
@@ -1,13 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef BUG_H
-#define BUG_H
+#ifndef _LINUX_BUG_H
+#define _LINUX_BUG_H
#include <asm/bug.h>
#define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond))
-#define BUILD_BUG_ON(x)
-
#define BUG() abort()
-#endif /* BUG_H */
+#endif /* _LINUX_BUG_H */
diff --git a/tools/virtio/linux/build_bug.h b/tools/virtio/linux/build_bug.h
new file mode 100644
index 000000000000..cdbb75e28a60
--- /dev/null
+++ b/tools/virtio/linux/build_bug.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_BUILD_BUG_H
+#define _LINUX_BUILD_BUG_H
+
+#define BUILD_BUG_ON(x)
+
+#endif /* _LINUX_BUILD_BUG_H */
diff --git a/tools/virtio/linux/cpumask.h b/tools/virtio/linux/cpumask.h
new file mode 100644
index 000000000000..307da69d6b26
--- /dev/null
+++ b/tools/virtio/linux/cpumask.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CPUMASK_H
+#define _LINUX_CPUMASK_H
+
+#include <linux/kernel.h>
+
+#endif /* _LINUX_CPUMASK_H */
diff --git a/tools/virtio/linux/gfp.h b/tools/virtio/linux/gfp.h
new file mode 100644
index 000000000000..43d146f236f1
--- /dev/null
+++ b/tools/virtio/linux/gfp.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_GFP_H
+#define __LINUX_GFP_H
+
+#include <linux/topology.h>
+
+#endif
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index 21593bf97755..8b877167933d 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -10,6 +10,7 @@
#include <stdarg.h>
#include <linux/compiler.h>
+#include <linux/log2.h>
#include <linux/types.h>
#include <linux/overflow.h>
#include <linux/list.h>
diff --git a/tools/virtio/linux/kmsan.h b/tools/virtio/linux/kmsan.h
new file mode 100644
index 000000000000..272b5aa285d5
--- /dev/null
+++ b/tools/virtio/linux/kmsan.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_KMSAN_H
+#define _LINUX_KMSAN_H
+
+#include <linux/gfp.h>
+
+inline void kmsan_handle_dma(struct page *page, size_t offset, size_t size,
+ enum dma_data_direction dir)
+{
+}
+
+#endif /* _LINUX_KMSAN_H */
diff --git a/tools/virtio/linux/scatterlist.h b/tools/virtio/linux/scatterlist.h
index 369ee308b668..74d9e1825748 100644
--- a/tools/virtio/linux/scatterlist.h
+++ b/tools/virtio/linux/scatterlist.h
@@ -2,6 +2,7 @@
#ifndef SCATTERLIST_H
#define SCATTERLIST_H
#include <linux/kernel.h>
+#include <linux/bug.h>
struct scatterlist {
unsigned long page_link;
diff --git a/tools/virtio/linux/topology.h b/tools/virtio/linux/topology.h
new file mode 100644
index 000000000000..910794afb993
--- /dev/null
+++ b/tools/virtio/linux/topology.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_TOPOLOGY_H
+#define _LINUX_TOPOLOGY_H
+
+#include <linux/cpumask.h>
+
+#endif /* _LINUX_TOPOLOGY_H */
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
index 6d1fccd3d86c..b68920d52750 100644
--- a/tools/virtio/ringtest/main.h
+++ b/tools/virtio/ringtest/main.h
@@ -140,25 +140,19 @@ static inline void busy_wait(void)
#define smp_wmb() smp_release()
#endif
-#ifdef __alpha__
-#define smp_read_barrier_depends() smp_acquire()
-#else
-#define smp_read_barrier_depends() do {} while(0)
-#endif
-
static __always_inline
void __read_once_size(const volatile void *p, void *res, int size)
{
- switch (size) { \
- case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break; \
- case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break; \
- case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break; \
- case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break; \
- default: \
- barrier(); \
- __builtin_memcpy((void *)res, (const void *)p, size); \
- barrier(); \
- } \
+ switch (size) {
+ case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break;
+ case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break;
+ case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break;
+ case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break;
+ default:
+ barrier();
+ __builtin_memcpy((void *)res, (const void *)p, size);
+ barrier();
+ }
}
static __always_inline void __write_once_size(volatile void *p, void *res, int size)
@@ -175,13 +169,22 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
}
}
+#ifdef __alpha__
#define READ_ONCE(x) \
({ \
union { typeof(x) __val; char __c[1]; } __u; \
__read_once_size(&(x), __u.__c, sizeof(x)); \
- smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \
+ smp_mb(); /* Enforce dependency ordering from x */ \
+ __u.__val; \
+})
+#else
+#define READ_ONCE(x) \
+({ \
+ union { typeof(x) __val; char __c[1]; } __u; \
+ __read_once_size(&(x), __u.__c, sizeof(x)); \
__u.__val; \
})
+#endif
#define WRITE_ONCE(x, val) \
({ \
diff --git a/tools/virtio/virtio-trace/trace-agent-ctl.c b/tools/virtio/virtio-trace/trace-agent-ctl.c
index 73d253d4b559..39860be6e2d8 100644
--- a/tools/virtio/virtio-trace/trace-agent-ctl.c
+++ b/tools/virtio/virtio-trace/trace-agent-ctl.c
@@ -75,7 +75,7 @@ static int wait_order(int ctl_fd)
if (ret)
break;
- };
+ }
return ret;
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index 86a410ddcedd..120062f94590 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -173,7 +173,7 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq,
long started = 0, completed = 0, next_reset = reset_n;
long completed_before, started_before;
int r, test = 1;
- unsigned len;
+ unsigned int len;
long long spurious = 0;
const bool random_batch = batch == RANDOM_BATCH;
diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c
index fa87b58bd5fa..98ff808d6f0c 100644
--- a/tools/virtio/vringh_test.c
+++ b/tools/virtio/vringh_test.c
@@ -308,6 +308,7 @@ static int parallel_test(u64 features,
gvdev.vdev.features = features;
INIT_LIST_HEAD(&gvdev.vdev.vqs);
+ spin_lock_init(&gvdev.vdev.vqs_list_lock);
gvdev.to_host_fd = to_host[1];
gvdev.notifies = 0;
@@ -455,6 +456,7 @@ int main(int argc, char *argv[])
getrange = getrange_iov;
vdev.features = 0;
INIT_LIST_HEAD(&vdev.vqs);
+ spin_lock_init(&vdev.vqs_list_lock);
while (argv[1]) {
if (strcmp(argv[1], "--indirect") == 0)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 13e88297f999..9c60384b5ae0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3954,6 +3954,13 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
}
mutex_lock(&kvm->lock);
+
+#ifdef CONFIG_LOCKDEP
+ /* Ensure that lockdep knows vcpu->mutex is taken *inside* kvm->lock */
+ mutex_lock(&vcpu->mutex);
+ mutex_unlock(&vcpu->mutex);
+#endif
+
if (kvm_get_vcpu_by_id(kvm, id)) {
r = -EEXIST;
goto unlock_vcpu_destroy;
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 495ceabffe88..9584eb57e0ed 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -336,7 +336,7 @@ static int kvm_vfio_has_attr(struct kvm_device *dev,
return -ENXIO;
}
-static void kvm_vfio_destroy(struct kvm_device *dev)
+static void kvm_vfio_release(struct kvm_device *dev)
{
struct kvm_vfio *kv = dev->private;
struct kvm_vfio_group *kvg, *tmp;
@@ -355,7 +355,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
kvm_vfio_update_coherency(dev);
kfree(kv);
- kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
+ kfree(dev); /* alloc by kvm_ioctl_create_device, free by .release */
}
static int kvm_vfio_create(struct kvm_device *dev, u32 type);
@@ -363,7 +363,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type);
static struct kvm_device_ops kvm_vfio_ops = {
.name = "kvm-vfio",
.create = kvm_vfio_create,
- .destroy = kvm_vfio_destroy,
+ .release = kvm_vfio_release,
.set_attr = kvm_vfio_set_attr,
.has_attr = kvm_vfio_has_attr,
};